Commit d6205ef5 authored by omid's avatar omid
Browse files

Some correction in reporting the final tree (based on the chosen

hypothesis)
parent 78a52c80
...@@ -62,821 +62,855 @@ const char *version = "1.3.0"; ...@@ -62,821 +62,855 @@ const char *version = "1.3.0";
int main(int aRgc, char **aRgv) { int main(int aRgc, char **aRgv) {
Timer timer_app; Timer timer_app;
timer_app.start(); timer_app.start();
try { try {
#ifdef USE_MKL_VML #ifdef USE_MKL_VML
// If used, intitialize the MKL VML library // If used, intitialize the MKL VML library
vmlSetMode(VML_HA | VML_DOUBLE_CONSISTENT); vmlSetMode(VML_HA | VML_DOUBLE_CONSISTENT);
#endif #endif
#ifdef USE_MPI #ifdef USE_MPI
// Start the high level parallel executor (based on MPI) // Start the high level parallel executor (based on MPI)
HighLevelCoordinator hlc(&aRgc, &aRgv); HighLevelCoordinator hlc(&aRgc, &aRgv);
#endif #endif
// Parse the command line // Parse the command line
CmdLine cmd; CmdLine cmd;
cmd.parseCmdLine(aRgc, aRgv); cmd.parseCmdLine(aRgc, aRgv);
// Adjust and report the number of threads that will be used // Adjust and report the number of threads that will be used
#ifdef _OPENMP #ifdef _OPENMP
int num_threads = omp_get_max_threads(); int num_threads = omp_get_max_threads();
// std::cout<<"max num of thr: "<< num_threads <<std::endl; // std::cout<<"max num of thr: "<< num_threads <<std::endl;
if ((cmd.mNumThreads >= 1) && if ((cmd.mNumThreads >= 1) &&
(cmd.mNumThreads <= (unsigned int)num_threads)) (cmd.mNumThreads <= (unsigned int)num_threads))
num_threads = cmd.mNumThreads; num_threads = cmd.mNumThreads;
// std::cout<<"num of thr: "<< num_threads <<std::endl; // std::cout<<"num of thr: "<< num_threads <<std::endl;
omp_set_num_threads(num_threads); omp_set_num_threads(num_threads);
/*if (num_threads < 2) /*if (num_threads < 2)
cmd.mForceSerial = true; cmd.mForceSerial = true;
else else
cmd.mForceSerial = false;*/ cmd.mForceSerial = false;*/
#else #else
cmd.mNumThreads = 1; cmd.mNumThreads = 1;
int num_threads = 1; int num_threads = 1;
cmd.mForceSerial = true; cmd.mForceSerial = true;
#endif #endif
/*#ifdef _OPENMP /*#ifdef _OPENMP
int num_threads = omp_get_max_threads(); int num_threads = omp_get_max_threads();
if(num_threads < 2 || cmd.mForceSerial) if(num_threads < 2 || cmd.mForceSerial)
{ {
cmd.mForceSerial = true; cmd.mForceSerial = true;
num_threads = 1; num_threads = 1;
omp_set_num_threads(1); omp_set_num_threads(1);
} }
#else #else
cmd.mForceSerial = true; cmd.mForceSerial = true;
int num_threads = 1; int num_threads = 1;
#endif*/ #endif*/
#ifdef USE_MPI #ifdef USE_MPI
// Shutdown messages from all MPI processes except the master // Shutdown messages from all MPI processes except the master
if (!hlc.isMaster()) if (!hlc.isMaster())
cmd.mVerboseLevel = VERBOSE_NONE; cmd.mVerboseLevel = VERBOSE_NONE;
#endif #endif
// std::cout <<std::endl<<"------------------"<< std::endl<<"FastCodeML // std::cout <<std::endl<<"------------------"<< std::endl<<"FastCodeML
//V"<<version<<std::endl<<"------------------"<<std::endl; //V"<<version<<std::endl<<"------------------"<<std::endl;
// Write out command line parameters (if not quiet i.e. if verbose level > // Write out command line parameters (if not quiet i.e. if verbose level >
// 0) // 0)
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) { if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
std::cout << "------------------------------------" << std::endl; std::cout << "------------------------------------" << std::endl;
std::cout << "FastCodeML V" << version << std::endl; std::cout << "FastCodeML V" << version << std::endl;
std::cout << "------------------------------------" << std::endl; std::cout << "------------------------------------" << std::endl;
std::cout << std::endl; std::cout << std::endl;
std::cout << "Tree file: " << cmd.mTreeFile << std::endl; std::cout << "Tree file: " << cmd.mTreeFile << std::endl;
std::cout << "Gene file: " << cmd.mGeneFile << std::endl; std::cout << "Gene file: " << cmd.mGeneFile << std::endl;
std::cout << "Verbose level: " << cmd.mVerboseLevel << " (" std::cout << "Verbose level: " << cmd.mVerboseLevel << " ("
<< decodeVerboseLevel(cmd.mVerboseLevel) << ')' << std::endl; << decodeVerboseLevel(cmd.mVerboseLevel) << ')'
if (cmd.mSeed) << std::endl;
std::cout << "Seed: " << cmd.mSeed << std::endl; if (cmd.mSeed)
if (cmd.mBranchFromFile) std::cout << "Seed: " << cmd.mSeed << std::endl;
std::cout << "Branch: From tree file" << std::endl; if (cmd.mBranchFromFile)
else if (cmd.mBranchAll) std::cout << "Branch: From tree file" << std::endl;
std::cout << "FG Branches: All (internals + leaves) " else if (cmd.mBranchAll)
<< std::endl; std::cout << "FG Branches: All (internals + leaves) "
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchStart == << std::endl;
// cmd.mBranchEnd) // else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchStart ==
// std::cout // cmd.mBranchEnd)
//<< "Branch: " << cmd.mBranchStart << std::endl; // std::cout
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchEnd == UINT_MAX) //<< "Branch: " << cmd.mBranchStart << std::endl;
// std::cout // else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchEnd == UINT_MAX)
//<< "Branches: " << cmd.mBranchStart << "-end" << std::endl; // std::cout
// else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchEnd != UINT_MAX) //<< "Branches: " << cmd.mBranchStart << "-end" << std::endl;
// std::cout // else if(cmd.mBranchStart != UINT_MAX && cmd.mBranchEnd != UINT_MAX)
//<< "Branches: " << cmd.mBranchStart << '-' << // std::cout
//cmd.mBranchEnd << std::endl; //<< "Branches: " << cmd.mBranchStart << '-' <<
if (!cmd.mStopIfNotLRT) //cmd.mBranchEnd << std::endl;
std::cout << "H0 pre stop: No" << std::endl; if (!cmd.mStopIfNotLRT)
if (cmd.mIgnoreFreq) std::cout << "H0 pre stop: No" << std::endl;
std::cout << "Codon freq.: Ignore" << std::endl; if (cmd.mIgnoreFreq)
if (cmd.mDoNotReduceForest) std::cout << "Codon freq.: Ignore" << std::endl;
std::cout << "Reduce forest: Do not reduce" << std::endl; if (cmd.mDoNotReduceForest)
else std::cout << "Reduce forest: Do not reduce" << std::endl;
std::cout << "Reduce forest: Aggressive" << std::endl; else
if (cmd.mInitH0fromH1) std::cout << "Reduce forest: Aggressive" << std::endl;
std::cout << "Starting val.: From H1" << std::endl; if (cmd.mInitH0fromH1)
else if (cmd.mInitFromParams && cmd.mBranchLengthsFromFile) std::cout << "Starting val.: From H1" << std::endl;
std::cout << "Starting val.: Times from tree file and params from " else if (cmd.mInitFromParams && cmd.mBranchLengthsFromFile)
"const (see below)" std::cout
<< std::endl; << "Starting val.: Times from tree file and params from "
else if (cmd.mInitFromParams) "const (see below)" << std::endl;
std::cout << "Starting val.: Params from const (see below)" else if (cmd.mInitFromParams)
<< std::endl; std::cout << "Starting val.: Params from const (see below)"
else if (cmd.mBranchLengthsFromFile) << std::endl;
std::cout << "Starting val.: Times from tree file" << std::endl; else if (cmd.mBranchLengthsFromFile)
if (cmd.mNoMaximization) std::cout << "Starting val.: Times from tree file"
std::cout << "Maximization: No" << std::endl; << std::endl;
if (cmd.mTrace) if (cmd.mNoMaximization)
std::cout << "Trace: On" << std::endl; std::cout << "Maximization: No" << std::endl;
if (cmd.mCleanData) if (cmd.mTrace)
std::cout << "Clean data: On" << std::endl; std::cout << "Trace: On" << std::endl;
else if (cmd.mCleanData)
std::cout << "Clean data: Off" << std::endl; std::cout << "Clean data: On" << std::endl;
if (cmd.mGraphFile) else
std::cout << "Graph file: " << cmd.mGraphFile << std::endl; std::cout << "Clean data: Off" << std::endl;
if (cmd.mGraphFile && cmd.mExportComputedTimes != UINT_MAX) if (cmd.mGraphFile)
std::cout << "Graph times: From H" << cmd.mExportComputedTimes std::cout << "Graph file: " << cmd.mGraphFile << std::endl;
<< std::endl; if (cmd.mGraphFile && cmd.mExportComputedTimes != UINT_MAX)
if (!cmd.mNoMaximization) std::cout << "Graph times: From H" << cmd.mExportComputedTimes
std::cout << "Optimizer: " << cmd.mOptimizationAlgo << std::endl; << std::endl;
if (cmd.mMaxIterations != MAX_ITERATIONS) if (!cmd.mNoMaximization)
std::cout << "Max iterations: " << cmd.mMaxIterations << std::endl; std::cout << "Optimizer: " << cmd.mOptimizationAlgo
if (cmd.mDeltaValueForGradient > 0.0) << std::endl;
std::cout << "Delta value: " << cmd.mDeltaValueForGradient if (cmd.mMaxIterations != MAX_ITERATIONS)
<< std::endl; std::cout << "Max iterations: " << cmd.mMaxIterations
std::cout << "Relative error: " << cmd.mRelativeError << std::endl; << std::endl;
if (cmd.mResultsFile) if (cmd.mDeltaValueForGradient > 0.0)
std::cout << "Results file: " << cmd.mResultsFile << std::endl; std::cout << "Delta value: " << cmd.mDeltaValueForGradient
if (cmd.mNumThreads) << std::endl;
std::cout << "Number of threads: " << cmd.mNumThreads << std::endl; std::cout << "Relative error: " << cmd.mRelativeError << std::endl;
if (cmd.mFixedBranchLength) if (cmd.mResultsFile)
std::cout << "Branch lengths are fixed" << std::endl; std::cout << "Results file: " << cmd.mResultsFile
<< std::endl;
if (cmd.mNumThreads)
std::cout << "Number of threads: " << cmd.mNumThreads
<< std::endl;
if (cmd.mFixedBranchLength)
std::cout << "Branch lengths are fixed" << std::endl;
#ifdef _OPENMP #ifdef _OPENMP
if (num_threads > 1) { if (num_threads > 1) {
std::cout << "Num. threads: " << num_threads << std::endl std::cout << "Num. threads: " << num_threads << std::endl
<< "Num. cores: " << omp_get_num_procs() << std::endl; << "Num. cores: " << omp_get_num_procs() << std::endl;
} else } else
#endif #endif
{ {
std::cout << "Num. threads: 1 serial" << std::endl std::cout << "Num. threads: 1 serial" << std::endl
<< "Num. cores: 1" << std::endl; << "Num. cores: 1" << std::endl;
} }
#ifdef USE_MPI #ifdef USE_MPI
if (hlc.numJobs() > 2) if (hlc.numJobs() > 2)
std::cout << "Num. MPI proc: 1 (master) + " << hlc.numJobs() - 1 std::cout << "Num. MPI proc: 1 (master) + " << hlc.numJobs() - 1
<< " (workers)" << std::endl; << " (workers)" << std::endl;
else else
std::cout << "Num. MPI proc: Insufficient, single task execution" std::cout << "Num. MPI proc: Insufficient, single task execution"
<< std::endl; << std::endl;
#endif #endif
std::cout << "Compiled with: "; std::cout << "Compiled with: ";
#ifdef _OPENMP #ifdef _OPENMP
std::cout << "USE_OPENMP "; std::cout << "USE_OPENMP ";
#endif #endif
#ifdef USE_MPI #ifdef USE_MPI
std::cout << "USE_MPI "; std::cout << "USE_MPI ";
#endif #endif
#ifdef USE_CPV_SCALING #ifdef USE_CPV_SCALING
std::cout << "USE_CPV_SCALING "; std::cout << "USE_CPV_SCALING ";
#endif #endif
#ifdef NEW_LIKELIHOOD #ifdef NEW_LIKELIHOOD
std::cout << "NEW_LIKELIHOOD "; std::cout << "NEW_LIKELIHOOD ";
#endif #endif
#ifdef NON_RECURSIVE_VISIT #ifdef NON_RECURSIVE_VISIT
std::cout << "NON_RECURSIVE_VISIT "; std::cout << "NON_RECURSIVE_VISIT ";
#endif #endif
#ifdef USE_DAG #ifdef USE_DAG
std::cout << "USE_DAG "; std::cout << "USE_DAG ";
#endif #endif
#ifdef USE_ORIGINAL_PROPORTIONS #ifdef USE_ORIGINAL_PROPORTIONS
std::cout << "USE_ORIGINAL_PROPORTIONS "; std::cout << "USE_ORIGINAL_PROPORTIONS ";
#endif #endif
#ifdef USE_LAPACK #ifdef USE_LAPACK
std::cout << "USE_LAPACK "; std::cout << "USE_LAPACK ";
#endif #endif
#ifdef USE_MKL_VML #ifdef USE_MKL_VML
std::cout << "USE_MKL_VML"; std::cout << "USE_MKL_VML";
#endif #endif
std::cout << std::endl << std::endl; std::cout << std::endl << std::endl;
if (cmd.mInitFromParams) { if (cmd.mInitFromParams) {
std::cout << "Param initial values:" << std::endl std::cout << "Param initial values:" << std::endl << std::endl
<< std::endl << ParseParameters::getInstance();
<< ParseParameters::getInstance(); }
} }
}
// Initialize the random number generator (0 means it is not set on the command // Initialize the random number generator (0 means it is not set on the command
// line) // line)
#ifdef USE_MPI #ifdef USE_MPI
// Insure that each MPI process starts with a different seed // Insure that each MPI process starts with a different seed
if (cmd.mSeed == 0) if (cmd.mSeed == 0)
cmd.mSeed = static_cast<unsigned int>(time(NULL)) + cmd.mSeed = static_cast<unsigned int>(time(NULL)) +
static_cast<unsigned int>(hlc.getRank()) * 1000; static_cast<unsigned int>(hlc.getRank()) * 1000;
#else #else
if (cmd.mSeed == 0) if (cmd.mSeed == 0)
cmd.mSeed = static_cast<unsigned int>(time(NULL)); cmd.mSeed = static_cast<unsigned int>(time(NULL));
#endif #endif
// srand(cmd.mSeed); // fastcodeml seed // srand(cmd.mSeed); // fastcodeml seed
SetSeedCodeml(cmd.mSeed, 0); // codeml seed is 1 SetSeedCodeml(cmd.mSeed, 0); // codeml seed is 1
// Verify the optimizer algorithm selected on the command line // Verify the optimizer algorithm selected on the command line
if (!cmd.mNoMaximization) if (!cmd.mNoMaximization)
BranchSiteModel::verifyOptimizerAlgo(cmd.mOptimizationAlgo); BranchSiteModel::verifyOptimizerAlgo(cmd.mOptimizationAlgo);
// Start a timer (to measure serial part over parallel one) // Start a timer (to measure serial part over parallel one)
Timer timer; Timer timer;
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
timer.start(); timer.start();
// Create the forest // Create the forest
Forest forest(cmd.mVerboseLevel); Forest forest(cmd.mVerboseLevel);
// Enclose file loading into a block so temporary structures could be // Enclose file loading into a block so temporary structures could be
// deleted when no more needed // deleted when no more needed
//{ //{
// Load the multiple sequence alignment (MSA) // Load the multiple sequence alignment (MSA)
Phylip msa(cmd.mVerboseLevel); Phylip msa(cmd.mVerboseLevel);
msa.readFile(cmd.mGeneFile, cmd.mCleanData); msa.readFile(cmd.mGeneFile, cmd.mCleanData);
// Load the phylogenetic tree // Load the phylogenetic tree
Newick tree(cmd.mVerboseLevel); Newick tree(cmd.mVerboseLevel);
tree.readFile(cmd.mTreeFile); tree.readFile(cmd.mTreeFile);
// Check coherence between the two files // Check coherence between the two files
msa.checkNameCoherence(tree.getSpecies()); msa.checkNameCoherence(tree.getSpecies());
// Check root and unrooting if tree is rooted // Check root and unrooting if tree is rooted
tree.checkRootBranches(); tree.checkRootBranches();
// If times from file then check for null branch lengths for any leaf // If times from file then check for null branch lengths for any leaf
if (cmd.mBranchLengthsFromFile) { if (cmd.mBranchLengthsFromFile) {
int zero_on_leaf_cnt = 0; int zero_on_leaf_cnt = 0;
int zero_on_int_cnt = 0; int zero_on_int_cnt = 0;
tree.countNullBranchLengths(zero_on_leaf_cnt, zero_on_int_cnt); tree.countNullBranchLengths(zero_on_leaf_cnt, zero_on_int_cnt);
if (zero_on_leaf_cnt > 0 || zero_on_int_cnt > 0) { if (zero_on_leaf_cnt > 0 || zero_on_int_cnt > 0) {
if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) { if (cmd.mVerboseLevel >= VERBOSE_ONLY_RESULTS) {
std::cout << "Found null or missing branch length in tree file: on " std::cout
<< zero_on_leaf_cnt << " leave(s) and on " << "Found null or missing branch length in tree file: on "
<< zero_on_int_cnt << " internal branch(es)." << std::endl; << zero_on_leaf_cnt << " leave(s) and on "
} << zero_on_int_cnt << " internal branch(es)."
} << std::endl;
} }
}
// Print the tree with the numbering of internal branches }
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
tree.printTreeAnnotated(std::cout); // Print the tree with the numbering of internal branches
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
// Load the forest tree.printTreeAnnotated(std::cout);
forest.loadTreeAndGenes(
tree, msa, cmd.mIgnoreFreq ? CodonFrequencies::CODON_FREQ_MODEL_UNIF // Load the forest
: CodonFrequencies::CODON_FREQ_MODEL_F3X4); forest.loadTreeAndGenes(tree, msa,
cmd.mIgnoreFreq ?
// Reduce the forest merging common subtrees. Add also more reduction, then CodonFrequencies::CODON_FREQ_MODEL_UNIF :
// clean the no more useful data. CodonFrequencies::CODON_FREQ_MODEL_F3X4);
if (!cmd.mDoNotReduceForest) {
// bool sts = forest.reduceSubtrees(cmd.mNumReductionBlocks); // Reduce the forest merging common subtrees. Add also more reduction, then
forest.reduceSubtrees(); // clean the no more useful data.
if (!cmd.mDoNotReduceForest) {
// bool sts = forest.reduceSubtrees(cmd.mNumReductionBlocks);
forest.reduceSubtrees();
#ifndef NEW_LIKELIHOOD #ifndef NEW_LIKELIHOOD
forest.addAggressiveReduction(); forest.addAggressiveReduction();
#endif #endif
forest.cleanReductionWorkingData(); forest.cleanReductionWorkingData();
#ifdef NEW_LIKELIHOOD #ifdef NEW_LIKELIHOOD
forest.prepareNewReduction(); forest.prepareNewReduction();
#endif #endif
} }
#ifdef NEW_LIKELIHOOD #ifdef NEW_LIKELIHOOD
else { else {
forest.prepareNewReductionNoReuse(); forest.prepareNewReductionNoReuse();
} }
#endif #endif
#ifdef NON_RECURSIVE_VISIT #ifdef NON_RECURSIVE_VISIT
// Prepare the pointers to visit the trees without recursion // Prepare the pointers to visit the trees without recursion
forest.prepareNonRecursiveVisit(); forest.prepareNonRecursiveVisit();
#endif #endif
// Subdivide the trees in groups based on dependencies // Subdivide the trees in groups based on dependencies
// forest.prepareDependencies(cmd.mForceSerial || cmd.mDoNotReduceForest); // forest.prepareDependencies(cmd.mForceSerial || cmd.mDoNotReduceForest);
#ifdef USE_DAG #ifdef USE_DAG
// Load the forest into a DAG // Load the forest into a DAG
forest.loadForestIntoDAG(Nt); forest.loadForestIntoDAG(Nt);
#endif #endif
// Get the time needed by data preprocessing // Get the time needed by data preprocessing
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) { if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
timer.stop(); timer.stop();
std::cout << std::endl std::cout << std::endl << "TIMER (preprocessing) ncores: "
<< "TIMER (preprocessing) ncores: " << std::setw(2) << std::setw(2) << num_threads << " time: " << timer.get()
<< num_threads << " time: " << timer.get() << std::endl; << std::endl;
} }
// Print few statistics // Print few statistics
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
std::cout << forest; std::cout << forest;
#ifdef USE_MPI #ifdef USE_MPI
// Distribute the work. If run under MPI then finish, else return to the // Distribute the work. If run under MPI then finish, else return to the
// standard execution flow // standard execution flow
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT)
timer.start(); timer.start();
bool has_run_under_MPI = hlc.startWork(forest, cmd); bool has_run_under_MPI = hlc.startWork(forest, cmd);
// If executed under MPI report the time spent, otherwise stop the timer so // If executed under MPI report the time spent, otherwise stop the timer so
// it can be restarted around the serial execution // it can be restarted around the serial execution
if (has_run_under_MPI) { if (has_run_under_MPI) {
if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) { if (cmd.mVerboseLevel >= VERBOSE_INFO_OUTPUT) {
timer.stop(); timer.stop();
std::cout << std::endl std::cout << std::endl
<< "TIMER (processing) ncores: " << std::setw(2) << "TIMER (processing) ncores: " << std::setw(2)
<< num_threads * (hlc.numJobs() - 1) + 1 << num_threads * (hlc.numJobs() - 1) + 1
<< " time: " << timer.get() << std::endl; << " time: " << timer.get() << std::endl;
} }
return 0; return 0;
} else { } else {
timer.stop(); timer.stop();
} }
#endif #endif
// Compute the range of branches to mark as foreground // Compute the range of branches to mark as foreground
size_t branch_start, branch_end; size_t branch_start, branch_end;
std::set<int> fg_set; // to save a list of fg branches from the std::set<int> fg_set; // to save a list of fg branches from the
// getBranchRange function // getBranchRange function
std::set<int> ib_set; // to save a list of internal branches from the std::set<int> ib_set; // to save a list of internal branches from the
// getBranchRange function // getBranchRange function
std::vector<double> mVar; // to save optimization variables std::vector<double> mVar; // to save optimization variables
forest.getBranchRange( forest.getBranchRange(cmd, branch_start