Commit d6205ef5 authored by omid's avatar omid
Browse files

Some correction in reporting the final tree (based on the chosen

hypothesis)
parent 78a52c80
......@@ -62,821 +62,855 @@ const char *version = "1.3.0";
int main(int aRgc, char **aRgv) {
Timer timer_app;
Timer timer_app;
timer_app.start();
timer_app.start();
try {
try {
#ifdef USE_MKL_VML
// If used, intitialize the MKL VML library
vmlSetMode(VML_HA | VML_DOUBLE_CONSISTENT);
// If used, intitialize the MKL VML library
vmlSetMode(VML_HA | VML_DOUBLE_CONSISTENT);
#endif
#ifdef USE_MPI
// Start the high level parallel executor (based on MPI)
HighLevelCoordinator hlc(&aRgc, &aRgv);
// Start the high level parallel executor (based on MPI)
HighLevelCoordinator hlc(&aRgc, &aRgv);
#endif
// Parse the command line
CmdLine cmd;
cmd.parseCmdLine(aRgc, aRgv);
// Parse the command line
CmdLine cmd;
cmd.parseCmdLine(aRgc, aRgv);
// Adjust and report the number of threads that will be used
#ifdef _OPENMP
int num_threads = omp_get_max_threads();
// std::cout<<"max num of thr: "<< num_threads <<std::endl;
if ((cmd.mNumThreads >= 1) &&
(cmd.mNumThreads <= (unsigned int)num_threads))
num_threads = cmd.mNumThreads;
// std::cout<<"num of thr: "<< num_threads <<std::endl;
omp_set_num_threads(num_threads);
/*if (num_threads < 2)
cmd.mForceSerial = true;
else
cmd.mForceSerial = false;*/
int num_threads = omp_get_max_threads();
// std::cout<<"max num of thr: "<< num_threads <<std::endl;
if ((cmd.mNumThreads >= 1) &&
(cmd.mNumThreads <= (unsigned int)num_threads))
num_threads = cmd.mNumThreads;
// std::cout<<"num of thr: "<< num_threads <<std::endl;
omp_set_num_threads(num_threads);
/*if (num_threads < 2)
cmd.mForceSerial = true;
else
cmd.mForceSerial = false;*/
#else
cmd.mNumThreads = 1;
int num_threads = 1;
cmd.mForceSerial = true;
cmd.mNumThreads = 1;
int num_threads = 1;
cmd.mForceSerial = true;
#endif
/*#ifdef _OPENMP
int num_threads = omp_get_max_threads();
if(num_threads < 2 || cmd.mForceSerial)
{
cmd.mForceSerial = true;
num_threads = 1;
omp_set_num_threads(1);
}
#else
cmd.mForceSerial = true;
int num_threads = 1;
#endif*/
/*#ifdef _OPENMP
int num_threads = omp_get_max_threads();
if(num_threads < 2 || cmd.mForceSerial)
{
cmd.mForceSerial = true;
num_threads = 1;
omp_set_num_threads(1);
}
#else
cmd.mForceSerial = true;
int num_threads = 1;
#endif*/
#ifdef USE_MPI
// Shutdown messages from all MPI processes except the master
if (!hlc.isMaster())
cmd.mVerboseLevel = VERBOSE_NONE;
// Shutdown messages from all MPI processes except the master
if (!hlc.isMaster())
cmd.mVerboseLevel = VERBOSE_NONE;
#endif
// std::cout <<std::endl<<"------------------"<< std::endl<<"FastCodeML
//V"<<version<<std::endl<<"------------------"<<std::endl;
// Write out command line parameters (if not quiet i.e. if verbose level >
// 0)
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
std::cout << "------------------------------------" << std::endl;
std::cout << "FastCodeML V" << version << std::endl;
std::cout << "------------------------------------" << std::endl;
std::cout << std::endl;
std::cout << "Tree file: " << cmd.mTreeFile << std::endl;
std::cout << "Gene file: " << cmd.mGeneFile << std::endl;
std::cout << "Verbose level: " << cmd.mVerboseLevel << " ("
<< decodeVerboseLevel(cmd.mVerboseLevel) << ')' << std::endl;
if (cmd.mSeed)
std::cout << "Seed: " << cmd.mSeed << std::endl;
if (cmd.mBranchFromFile)
std::cout << "Branch: From tree file" << std::endl;
else if (cmd.mBranchAll)
std::cout << "FG Branches: All (internals + leaves) "
<< std::endl;
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchStart ==
// cmd.mBranchEnd)
// std::cout
//<< "Branch: " << cmd.mBranchStart << std::endl;
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchEnd == UINT_MAX)
// std::cout
//<< "Branches: " << cmd.mBranchStart << "-end" << std::endl;
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchEnd != UINT_MAX)
// std::cout
//<< "Branches: " << cmd.mBranchStart << '-' <<
//cmd.mBranchEnd << std::endl;
if (!cmd.mStopIfNotLRT)
std::cout << "H0 pre stop: No" << std::endl;
if (cmd.mIgnoreFreq)
std::cout << "Codon freq.: Ignore" << std::endl;
if (cmd.mDoNotReduceForest)
std::cout << "Reduce forest: Do not reduce" << std::endl;
else
std::cout << "Reduce forest: Aggressive" << std::endl;
if (cmd.mInitH0fromH1)
std::cout << "Starting val.: From H1" << std::endl;
else if (cmd.mInitFromParams && cmd.mBranchLengthsFromFile)
std::cout << "Starting val.: Times from tree file and params from "
"const (see below)"
<< std::endl;
else if (cmd.mInitFromParams)
std::cout << "Starting val.: Params from const (see below)"
<< std::endl;
else if (cmd.mBranchLengthsFromFile)
std::cout << "Starting val.: Times from tree file" << std::endl;
if (cmd.mNoMaximization)
std::cout << "Maximization: No" << std::endl;
if (cmd.mTrace)
std::cout << "Trace: On" << std::endl;
if (cmd.mCleanData)
std::cout << "Clean data: On" << std::endl;
else
std::cout << "Clean data: Off" << std::endl;
if (cmd.mGraphFile)
std::cout << "Graph file: " << cmd.mGraphFile << std::endl;
if (cmd.mGraphFile && cmd.mExportComputedTimes != UINT_MAX)
std::cout << "Graph times: From H" << cmd.mExportComputedTimes
<< std::endl;
if (!cmd.mNoMaximization)
std::cout << "Optimizer: " << cmd.mOptimizationAlgo << std::endl;
if (cmd.mMaxIterations != MAX_ITERATIONS)
std::cout << "Max iterations: " << cmd.mMaxIterations << std::endl;
if (cmd.mDeltaValueForGradient > 0.0)
std::cout << "Delta value: " << cmd.mDeltaValueForGradient
<< std::endl;
std::cout << "Relative error: " << cmd.mRelativeError << std::endl;
if (cmd.mResultsFile)
std::cout << "Results file: " << cmd.mResultsFile << std::endl;
if (cmd.mNumThreads)
std::cout << "Number of threads: " << cmd.mNumThreads << std::endl;
if (cmd.mFixedBranchLength)
std::cout << "Branch lengths are fixed" << std::endl;
// std::cout <<std::endl<<"------------------"<< std::endl<<"FastCodeML
//V"<<version<<std::endl<<"------------------"<<std::endl;
// Write out command line parameters (if not quiet i.e. if verbose level >
// 0)
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
std::cout << "------------------------------------" << std::endl;
std::cout << "FastCodeML V" << version << std::endl;
std::cout << "------------------------------------" << std::endl;
std::cout << std::endl;
std::cout << "Tree file: " << cmd.mTreeFile << std::endl;
std::cout << "Gene file: " << cmd.mGeneFile << std::endl;
std::cout << "Verbose level: " << cmd.mVerboseLevel << " ("
<< decodeVerboseLevel(cmd.mVerboseLevel) << ')'
<< std::endl;
if (cmd.mSeed)
std::cout << "Seed: " << cmd.mSeed << std::endl;
if (cmd.mBranchFromFile)
std::cout << "Branch: From tree file" << std::endl;
else if (cmd.mBranchAll)
std::cout << "FG Branches: All (internals + leaves) "
<< std::endl;
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchStart ==
// cmd.mBranchEnd)
// std::cout
//<< "Branch: " << cmd.mBranchStart << std::endl;
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchEnd == UINT_MAX)
// std::cout
//<< "Branches: " << cmd.mBranchStart << "-end" << std::endl;
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchEnd != UINT_MAX)
// std::cout
//<< "Branches: " << cmd.mBranchStart << '-' <<
//cmd.mBranchEnd << std::endl;
if (!cmd.mStopIfNotLRT)
std::cout << "H0 pre stop: No" << std::endl;
if (cmd.mIgnoreFreq)
std::cout << "Codon freq.: Ignore" << std::endl;
if (cmd.mDoNotReduceForest)
std::cout << "Reduce forest: Do not reduce" << std::endl;
else
std::cout << "Reduce forest: Aggressive" << std::endl;
if (cmd.mInitH0fromH1)
std::cout << "Starting val.: From H1" << std::endl;
else if (cmd.mInitFromParams && cmd.mBranchLengthsFromFile)
std::cout
<< "Starting val.: Times from tree file and params from "
"const (see below)" << std::endl;
else if (cmd.mInitFromParams)
std::cout << "Starting val.: Params from const (see below)"
<< std::endl;
else if (cmd.mBranchLengthsFromFile)
std::cout << "Starting val.: Times from tree file"
<< std::endl;
if (cmd.mNoMaximization)
std::cout << "Maximization: No" << std::endl;
if (cmd.mTrace)
std::cout << "Trace: On" << std::endl;
if (cmd.mCleanData)
std::cout << "Clean data: On" << std::endl;
else
std::cout << "Clean data: Off" << std::endl;
if (cmd.mGraphFile)
std::cout << "Graph file: " << cmd.mGraphFile << std::endl;
if (cmd.mGraphFile && cmd.mExportComputedTimes != UINT_MAX)
std::cout << "Graph times: From H" << cmd.mExportComputedTimes
<< std::endl;
if (!cmd.mNoMaximization)
std::cout << "Optimizer: " << cmd.mOptimizationAlgo
<< std::endl;
if (cmd.mMaxIterations != MAX_ITERATIONS)
std::cout << "Max iterations: " << cmd.mMaxIterations
<< std::endl;
if (cmd.mDeltaValueForGradient > 0.0)
std::cout << "Delta value: " << cmd.mDeltaValueForGradient
<< std::endl;
std::cout << "Relative error: " << cmd.mRelativeError << std::endl;
if (cmd.mResultsFile)
std::cout << "Results file: " << cmd.mResultsFile
<< std::endl;
if (cmd.mNumThreads)
std::cout << "Number of threads: " << cmd.mNumThreads
<< std::endl;
if (cmd.mFixedBranchLength)
std::cout << "Branch lengths are fixed" << std::endl;
#ifdef _OPENMP
if (num_threads > 1) {
std::cout << "Num. threads: " << num_threads << std::endl
<< "Num. cores: " << omp_get_num_procs() << std::endl;
} else
if (num_threads > 1) {
std::cout << "Num. threads: " << num_threads << std::endl
<< "Num. cores: " << omp_get_num_procs() << std::endl;
} else
#endif
{
std::cout << "Num. threads: 1 serial" << std::endl
<< "Num. cores: 1" << std::endl;
}
{
std::cout << "Num. threads: 1 serial" << std::endl
<< "Num. cores: 1" << std::endl;
}
#ifdef USE_MPI
if (hlc.numJobs() > 2)
std::cout << "Num. MPI proc: 1 (master) + " << hlc.numJobs() - 1
<< " (workers)" << std::endl;
else
std::cout << "Num. MPI proc: Insufficient, single task execution"
<< std::endl;
if (hlc.numJobs() > 2)
std::cout << "Num. MPI proc: 1 (master) + " << hlc.numJobs() - 1
<< " (workers)" << std::endl;
else
std::cout << "Num. MPI proc: Insufficient, single task execution"
<< std::endl;
#endif
std::cout << "Compiled with: ";
std::cout << "Compiled with: ";
#ifdef _OPENMP
std::cout << "USE_OPENMP ";
std::cout << "USE_OPENMP ";
#endif
#ifdef USE_MPI
std::cout << "USE_MPI ";
std::cout << "USE_MPI ";
#endif
#ifdef USE_CPV_SCALING
std::cout << "USE_CPV_SCALING ";
std::cout << "USE_CPV_SCALING ";
#endif
#ifdef NEW_LIKELIHOOD
std::cout << "NEW_LIKELIHOOD ";
std::cout << "NEW_LIKELIHOOD ";
#endif
#ifdef NON_RECURSIVE_VISIT
std::cout << "NON_RECURSIVE_VISIT ";
std::cout << "NON_RECURSIVE_VISIT ";
#endif
#ifdef USE_DAG
std::cout << "USE_DAG ";
std::cout << "USE_DAG ";
#endif
#ifdef USE_ORIGINAL_PROPORTIONS
std::cout << "USE_ORIGINAL_PROPORTIONS ";
std::cout << "USE_ORIGINAL_PROPORTIONS ";
#endif
#ifdef USE_LAPACK
std::cout << "USE_LAPACK ";
std::cout << "USE_LAPACK ";
#endif
#ifdef USE_MKL_VML
std::cout << "USE_MKL_VML";
std::cout << "USE_MKL_VML";
#endif
std::cout << std::endl << std::endl;
if (cmd.mInitFromParams) {
std::cout << "Param initial values:" << std::endl
<< std::endl
<< ParseParameters::getInstance();
}
}
std::cout << std::endl << std::endl;
if (cmd.mInitFromParams) {
std::cout << "Param initial values:" << std::endl << std::endl
<< ParseParameters::getInstance();
}
}
// Initialize the random number generator (0 means it is not set on the command
// line)
#ifdef USE_MPI
// Insure that each MPI process starts with a different seed
if (cmd.mSeed == 0)
cmd.mSeed = static_cast<unsigned int>(time(NULL)) +
static_cast<unsigned int>(hlc.getRank()) * 1000;
// Insure that each MPI process starts with a different seed
if (cmd.mSeed == 0)
cmd.mSeed = static_cast<unsigned int>(time(NULL)) +
static_cast<unsigned int>(hlc.getRank()) * 1000;
#else
if (cmd.mSeed == 0)
cmd.mSeed = static_cast<unsigned int>(time(NULL));
if (cmd.mSeed == 0)
cmd.mSeed = static_cast<unsigned int>(time(NULL));
#endif
// srand(cmd.mSeed); // fastcodeml seed
SetSeedCodeml(cmd.mSeed, 0); // codeml seed is 1
// Verify the optimizer algorithm selected on the command line
if (!cmd.mNoMaximization)
BranchSiteModel::verifyOptimizerAlgo(cmd.mOptimizationAlgo);
// Start a timer (to measure serial part over parallel one)
Timer timer;
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
timer.start();
// Create the forest
Forest forest(cmd.mVerboseLevel);
// Enclose file loading into a block so temporary structures could be
// deleted when no more needed
//{
// Load the multiple sequence alignment (MSA)
Phylip msa(cmd.mVerboseLevel);
msa.readFile(cmd.mGeneFile, cmd.mCleanData);
// Load the phylogenetic tree
Newick tree(cmd.mVerboseLevel);
tree.readFile(cmd.mTreeFile);
// Check coherence between the two files
msa.checkNameCoherence(tree.getSpecies());
// Check root and unrooting if tree is rooted
tree.checkRootBranches();
// If times from file then check for null branch lengths for any leaf
if (cmd.mBranchLengthsFromFile) {
int zero_on_leaf_cnt = 0;
int zero_on_int_cnt = 0;
tree.countNullBranchLengths(zero_on_leaf_cnt, zero_on_int_cnt);
if (zero_on_leaf_cnt > 0 || zero_on_int_cnt > 0) {
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout << "Found null or missing branch length in tree file: on "
<< zero_on_leaf_cnt << " leave(s) and on "
<< zero_on_int_cnt << " internal branch(es)." << std::endl;
}
}
}
// Print the tree with the numbering of internal branches
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
tree.printTreeAnnotated(std::cout);
// Load the forest
forest.loadTreeAndGenes(
tree, msa, cmd.mIgnoreFreq ? CodonFrequencies::CODON_FREQ_MODEL_UNIF
: CodonFrequencies::CODON_FREQ_MODEL_F3X4);
// Reduce the forest merging common subtrees. Add also more reduction, then
// clean the no more useful data.
if (!cmd.mDoNotReduceForest) {
// bool sts = forest.reduceSubtrees(cmd.mNumReductionBlocks);
forest.reduceSubtrees();
// srand(cmd.mSeed); // fastcodeml seed
SetSeedCodeml(cmd.mSeed, 0); // codeml seed is 1
// Verify the optimizer algorithm selected on the command line
if (!cmd.mNoMaximization)
BranchSiteModel::verifyOptimizerAlgo(cmd.mOptimizationAlgo);
// Start a timer (to measure serial part over parallel one)
Timer timer;
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
timer.start();
// Create the forest
Forest forest(cmd.mVerboseLevel);
// Enclose file loading into a block so temporary structures could be
// deleted when no more needed
//{
// Load the multiple sequence alignment (MSA)
Phylip msa(cmd.mVerboseLevel);
msa.readFile(cmd.mGeneFile, cmd.mCleanData);
// Load the phylogenetic tree
Newick tree(cmd.mVerboseLevel);
tree.readFile(cmd.mTreeFile);
// Check coherence between the two files
msa.checkNameCoherence(tree.getSpecies());
// Check root and unrooting if tree is rooted
tree.checkRootBranches();
// If times from file then check for null branch lengths for any leaf
if (cmd.mBranchLengthsFromFile) {
int zero_on_leaf_cnt = 0;
int zero_on_int_cnt = 0;
tree.countNullBranchLengths(zero_on_leaf_cnt, zero_on_int_cnt);
if (zero_on_leaf_cnt > 0 || zero_on_int_cnt > 0) {
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout
<< "Found null or missing branch length in tree file: on "
<< zero_on_leaf_cnt << " leave(s) and on "
<< zero_on_int_cnt << " internal branch(es)."
<< std::endl;
}
}
}
// Print the tree with the numbering of internal branches
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
tree.printTreeAnnotated(std::cout);
// Load the forest
forest.loadTreeAndGenes(tree, msa,
cmd.mIgnoreFreq ?
CodonFrequencies::CODON_FREQ_MODEL_UNIF :
CodonFrequencies::CODON_FREQ_MODEL_F3X4);
// Reduce the forest merging common subtrees. Add also more reduction, then
// clean the no more useful data.
if (!cmd.mDoNotReduceForest) {
// bool sts = forest.reduceSubtrees(cmd.mNumReductionBlocks);
forest.reduceSubtrees();
#ifndef NEW_LIKELIHOOD
forest.addAggressiveReduction();
forest.addAggressiveReduction();
#endif
forest.cleanReductionWorkingData();
forest.cleanReductionWorkingData();
#ifdef NEW_LIKELIHOOD
forest.prepareNewReduction();
forest.prepareNewReduction();
#endif
}
}
#ifdef NEW_LIKELIHOOD
else {
forest.prepareNewReductionNoReuse();
}
else {
forest.prepareNewReductionNoReuse();
}
#endif
#ifdef NON_RECURSIVE_VISIT
// Prepare the pointers to visit the trees without recursion
forest.prepareNonRecursiveVisit();
// Prepare the pointers to visit the trees without recursion
forest.prepareNonRecursiveVisit();
#endif
// Subdivide the trees in groups based on dependencies
// forest.prepareDependencies(cmd.mForceSerial || cmd.mDoNotReduceForest);
#ifdef USE_DAG
// Load the forest into a DAG
forest.loadForestIntoDAG(Nt);
// Load the forest into a DAG
forest.loadForestIntoDAG(Nt);
#endif
// Get the time needed by data preprocessing
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
timer.stop();
std::cout << std::endl
<< "TIMER (preprocessing) ncores: " << std::setw(2)
<< num_threads << " time: " << timer.get() << std::endl;
}
// Get the time needed by data preprocessing
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
timer.stop();
std::cout << std::endl << "TIMER (preprocessing) ncores: "
<< std::setw(2) << num_threads << " time: " << timer.get()
<< std::endl;
}
// Print few statistics
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
std::cout << forest;
// Print few statistics
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
std::cout << forest;
#ifdef USE_MPI
// Distribute the work. If run under MPI then finish, else return to the
// standard execution flow
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
timer.start();
bool has_run_under_MPI = hlc.startWork(forest, cmd);
// If executed under MPI report the time spent, otherwise stop the timer so
// it can be restarted around the serial execution
if (has_run_under_MPI) {
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
timer.stop();
std::cout << std::endl
<< "TIMER (processing) ncores: " << std::setw(2)
<< num_threads * (hlc.numJobs() - 1) + 1
<< " time: " << timer.get() << std::endl;
}
return 0;
} else {
timer.stop();
}
// Distribute the work. If run under MPI then finish, else return to the
// standard execution flow
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
timer.start();
bool has_run_under_MPI = hlc.startWork(forest, cmd);
// If executed under MPI report the time spent, otherwise stop the timer so
// it can be restarted around the serial execution
if (has_run_under_MPI) {
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
timer.stop();
std::cout << std::endl
<< "TIMER (processing) ncores: " << std::setw(2)
<< num_threads * (hlc.numJobs() - 1) + 1
<< " time: " << timer.get() << std::endl;
}
return 0;
} else {
timer.stop();
}
#endif
// Compute the range of branches to mark as foreground
size_t branch_start, branch_end;
std::set<int> fg_set; // to save a list of fg branches from the
// getBranchRange function
std::set<int> ib_set; // to save a list of internal branches from the
// getBranchRange function
std::vector<double> mVar; // to save optimization variables
forest.getBranchRange(
cmd, branch_start, branch_end, fg_set,
ib_set); // fgset is added to save a list of fg branches
// for (std::set<int>::iterator it=ib_set.begin(); it!=ib_set.end(); ++it)
// std::cout << " " << *it << ",";
// Start timing parallel part
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
timer.start();
double lnl0, lnl1 = 0.;
if (!fg_set.empty()) // in case of marked fg branches (one or multiple fg)