Commit 1e9fe46b authored by oshahmir's avatar oshahmir
Browse files

new merge from master-dev branch

parents 8ab98271 51c4dc5b
......@@ -547,14 +547,14 @@ double MfgBayesTest::getGridParams(const std::vector<double> &aVars,
}
// Fill the matrices and compute their eigendecomposition.
#ifdef _MSC_VER
#pragma omp parallel sections default(none) shared( \
//#ifdef _MSC_VER
//#pragma omp parallel sections default(none) shared( \
omega_fg, omega_bg, kappa, q_fg, q_bg, omega_fg_is_one, omega_bg_is_one)
#else
#pragma omp parallel sections default(shared)
#endif
//#else
//#pragma omp parallel sections default(shared)
//#endif
{
#pragma omp section
//#pragma omp section
{
if (omega_fg_is_one)
q_fg.fillMatrix(kappa);
......@@ -563,7 +563,7 @@ double MfgBayesTest::getGridParams(const std::vector<double> &aVars,
q_fg.eigenQREV();
}
#pragma omp section
//#pragma omp section
{
if (omega_bg_is_one)
q_bg.fillMatrix(kappa);
......@@ -580,6 +580,7 @@ double MfgBayesTest::getGridParams(const std::vector<double> &aVars,
// Compute likelihoods
CacheAlignedDoubleVector likelihoods(mNumSites);
//probably main issue for beb non-determinism
mForest.computeLikelihoods(mBEBset, likelihoods,
mDependencies.getDependencies());
for (size_t site = 0; site < mNumSites; ++site) {
......@@ -587,6 +588,15 @@ double MfgBayesTest::getGridParams(const std::vector<double> &aVars,
mPriors[iw * mNumSites + site] =
(p > 0) ? log(p) : -184.2068074395237; // If p < 0 then the value in
// codeml.c is: log(1e-80);
// debug
//if (site==982)
//std::cout<<"before normalization mPriors[site] for cat " << iw << " is "<< mPriors[iw * mNumSites + site] << std::endl;
// debug
// if (site==982)
// std::cout<<"likeliohoods[site] for cat " << iw << " is " << likelihoods[site] << std::endl;
}
}
......@@ -604,10 +614,17 @@ double MfgBayesTest::getGridParams(const std::vector<double> &aVars,
// remove the previous log.
for (size_t k = 0; k < BEB_NUM_CAT; ++k) {
mPriors[k * mNumSites + site] = exp(mPriors[k * mNumSites + site] - fh);
// debug
// if (site==982)
// std::cout<<"mPriors[site] for cat " << k << " is "<< mPriors[k * mNumSites + site] << std::endl;
}
scale += fh * aSiteMultiplicity[site];
}
return scale;
}
......@@ -821,6 +838,8 @@ void MfgBayesTest::computeBEB(const std::vector<double> &aVars,
std::cout << std::endl
<< "Calculating f(w|X), posterior probs of site classes."
<< std::endl;
//debug
//std::cout << "mNumSites: " << mNumSites << std::endl;
for (unsigned int site = 0; site < mNumSites; ++site) {
scale1 = -1e300;
......@@ -857,6 +876,14 @@ void MfgBayesTest::computeBEB(const std::vector<double> &aVars,
}
for (unsigned int j = 0; j < BEB_DIMS; ++j)
mSiteClassProb[j * mNumSites + site] *= exp(scale1 - fX);
//debug
// if (site==982)
// {
// std::cout << "scale (t=log(fhk[codon_class]) + lnfXs[igrid]): " << scale1 << " fX: " << fX << " site multiplicity " << site_multiplicity[site] <<" prob: " << mSiteClassProb[2 * mNumSites + site] + mSiteClassProb[3 * mNumSites + site] << std::endl;
// }
}
}
......@@ -903,6 +930,8 @@ void MfgBayesTest::printPositiveSelSites(std::set<int> aFgBranchSet) const {
ordered_map.insert(std::pair<size_t, size_t>(it->second, current_idx));
probs.push_back(prob);
++current_idx;
//debug
//std::cout<< "idx: " << current_idx-1 << " prob: " << prob << " site: " << site << std::endl;
}
}
}
......@@ -925,6 +954,11 @@ void MfgBayesTest::printPositiveSelSites(std::set<int> aFgBranchSet) const {
std::cout << std::setw(6) << im->first + 1 << ' ' << std::fixed
<< std::setprecision(6) << prob << sig << std::endl;
//debug
// if (im->first+1 == 1030)
// std::cout<<"reduced site num for 1030: "<<im->second<<std::endl;
}
}
......
......@@ -375,6 +375,9 @@ void BranchSiteModel::initFromResult(const std::vector<double> &aPreviousResult,
if (aValidLen == 0)
aValidLen = static_cast<unsigned int>(aPreviousResult.size());
//debug
//std::cout <<"aValidLen: " << aValidLen <<" mNumVariables "<<mNumVariables<< std::endl;
if (mFixedBranchLength) {
// Too long, cut. Too short, ignore. Remember H0 has 4 variables.
if (aValidLen > mNumVariables)
......@@ -385,11 +388,25 @@ void BranchSiteModel::initFromResult(const std::vector<double> &aPreviousResult,
} else if (aValidLen < 4)
aValidLen = 0;
//debug
//for (std::vector<double>::const_iterator i = aPreviousResult.begin(); i != aPreviousResult.end(); ++i)
//std::cout << *i << ' ';
// Copy the requested values
mVar.assign(aPreviousResult.begin(),
aPreviousResult.begin() + static_cast<size_t>(aValidLen));
//debug
//for (std::vector<double>::const_iterator i = mVar.begin(); i != mVar.end(); ++i)
//std::cout << *i << ' ';
mVar.resize(static_cast<size_t>(mNumVariables));
} else {
//debug
//for (std::vector<double>::const_iterator i = mVar.begin(); i != mVar.end(); ++i)
//std::cout << *i << ' ';
}
else {
// Too long, cut. Too short, ignore. Remember H0 has 4 variables.
if (aValidLen > mNumTimes + mNumVariables)
aValidLen = mNumTimes + mNumVariables;
......
......@@ -40,16 +40,19 @@ else(Boost_FOUND)
endif(Boost_FOUND)
# Get the configuration switches
# Get the *basic* configuration switches
OPTION(USE_LAPACK "Use BLAS/LAPACK" OFF)
OPTION(USE_MKL_VML "Use Intel MKL vectorized routines" OFF)
OPTION(USE_OPENMP "Compile with OpenMP support" ON)
OPTION(USE_MPI "Use MPI for high level parallelization" ON)
# Get the *advanced* configuration switches (change default values with care, some options are not fully functional)
OPTION(USE_MKL_VML "Use Intel MKL vectorized routines" OFF)
OPTION(USE_MPI "Use MPI for high level parallelization" OFF)
if(NOT WIN32)
OPTION(BUILD_NOT_SHARED "Build FastCodeML not shared" OFF)
endif(NOT WIN32)
OPTION(BUILD_SEARCH_MPI "Search for MPI installation?" OFF)
OPTION(USE_ORIGINAL_PROPORTIONS "Use the original CodeML proportion definition" OFF)
OPTION(USE_ORIGINAL_PROPORTIONS "Use the original CodeML proportion definition" ON)
SET(USE_LIKELIHOOD_METHOD "Original" CACHE STRING "Select the type of likelihood computation method: Original, NonRecursive, FatVector, DAG")
SET_PROPERTY(CACHE USE_LIKELIHOOD_METHOD PROPERTY STRINGS Original NonRecursive FatVector DAG)
OPTION(USE_IDENTITY_MATRIX "Force identity matrix when time is zero" OFF)
......@@ -194,3 +197,5 @@ endif(MPI_LIBRARY)
add_subdirectory(bad_cases)
\ No newline at end of file
......@@ -156,6 +156,11 @@ int Ming2::ming2(FILE *fout, double *f, double x[], const double xl[],
}
// double f0 = *f = (*fun) (x, n); ++mNumFunCall;
//debug
//for (int i=0;i < 100; i++)
//printf("x[%d]=%f - ", i,x[i]);
double f0 = *f = -mModel->computeLikelihood(x, n, mTraceFun);
xtoy(x, x0, n);
double sizep = 99.;
......
......@@ -906,16 +906,16 @@ void Forest::computeLikelihoods(const mfgProbabilityMatrixSet &aSet,
const int len = static_cast<int>(ivs->size());
const unsigned int *tmp_ivs = &(*ivs)[0];
#ifdef _MSC_VER
#pragma omp parallel for default(none) shared(aSet, len, tmp_ivs, tmp_roots, \
//#ifdef _MSC_VER
//#pragma omp parallel for default(none) shared(aSet, len, tmp_ivs, tmp_roots, \
likelihoods) schedule(static)
#else
#pragma omp parallel for default(shared)
#endif
//#else
//#pragma omp parallel for default(shared)
//#endif
for (int i = 0; i < len; ++i) {
#ifndef _MSC_VER
#pragma omp task untied
#endif
//#ifndef _MSC_VER
//#pragma omp task untied
//#endif
{
// Compute likelihood array at the root of one tree (the access order is
// the fastest)
......
......@@ -25,6 +25,8 @@
#include "BayesTest.h"
#include "VerbosityLevels.h"
static const int INVALID_RANK = -1;
enum JobStatus {
......
......@@ -30,11 +30,11 @@ INSTALLATION:
In the make.inc file set BLASLIB to the full path to the generated BLAS
e.g. BLASLIB = path/to/installation/blas/lib/libopenblas.a
Run $make
Two libraries will be created: lapack.a and tmglib.a.
Two libraries will be created: liblapack.a and libtmglib.a.
Put them inside a folder and merge them:
unpack in a new directory
ar x liblapack.a
ar x tmglib.a
ar x libtmglib.a
merge all produced object files into a new LAPACK library
ar rcs liblapack.a *.o
copy the liblapack.a into the library folder
......
......@@ -6,7 +6,7 @@
#include "WriteResults.h"
#include <string>
void WriteResults::outputResults(void) {
void WriteResults::outputResults(std::set<int> aIbSet, bool aBranchAll) {
// If no file set, then do nothing
if (!mFilename)
return;
......@@ -47,6 +47,9 @@ void WriteResults::outputResults(void) {
// Write the log-likelihood values (if a value is not present, write NA)
for (size_t branch = min_branch; branch <= max_branch; ++branch) {
if (aBranchAll || (aIbSet.find(branch) != aIbSet.end())) {
out << "Branch: " << std::setw(4) << branch << std::endl
<< std::endl
<< " LnL0: ";
......@@ -79,6 +82,7 @@ void WriteResults::outputResults(void) {
}
out << std::endl;
}
}
// Write the positive selection sites (adding one to the site number because
// they start from 1 and not zero)
......@@ -95,7 +99,7 @@ void WriteResults::outputResults(void) {
size_t ns = site.size();
for (size_t s = 0; s < ns; ++s) {
size_t i = idx[s];
out << "PositiveSelectionSite for branch: " << std::setw(4) << branch;
out << "Positive Selection Sites for branch: " << std::setw(4) << branch;
out << " Site: " << std::setw(6) << site[i] + 1
<< " Prob: " << std::setw(9) << std::setprecision(6) << std::fixed
<< prob[i] << std::endl;
......@@ -164,3 +168,179 @@ void WriteResults::saveParameters(size_t aFgBranch, std::string &aParamStr,
// Save the likelihood for later printing
mParamStr[aHypothesis][aFgBranch] = aParamStr;
}
// reporting methods where fg branches are marked in the nwk file
void WriteResultsMfg::outputResults(std::set<int> aFgSet) {
// If no file set, then do nothing
if (!mFilename)
return;
// Range of branches to be output (for H0 and H1)
std::map<size_t, double>::const_iterator im;
std::map<size_t, std::string>::const_iterator ims;
//size_t min_branch = std::numeric_limits<size_t>::max();
//size_t max_branch = 0;
//for (im = mLnL[0].begin(); im != mLnL[0].end(); ++im) {
//size_t v = im->first;
//if (v < min_branch)
// min_branch = v;
//if (v > max_branch)
// max_branch = v;
// }
//for (im = mLnL[1].begin(); im != mLnL[1].end(); ++im) {
// size_t v = im->first;
// if (v < min_branch)
// min_branch = v;
// if (v > max_branch)
// max_branch = v;
// }
// No entries, so do nothing
// if (min_branch == std::numeric_limits<size_t>::max())
// return;
// Open the output file
std::ofstream out(mFilename, std::ios_base::trunc | std::ios_base::out);
if (!out.good()) {
std::cout << "Cannot create results file <" << mFilename << ">"
<< std::endl;
return;
}
// Write the log-likelihood values (if a value is not present, write NA)
// for (size_t branch = min_branch; branch <= max_branch; ++branch) {
// if (aBranchAll || (aIbSet.find(branch) != aIbSet.end())) {
out << "Branch(es): " << std::setw(4) ;
for (std::set<int>::iterator it = aFgSet.begin();it != aFgSet.end(); ++it)
{out << " " << *it << " ";}
out << std::endl
<< std::endl
<< " LnL0: ";
// Prints LnL for H0 if present
im = mLnL[0].find(0);
if (im == mLnL[0].end()) {
out << std::setw(22) << "NA" << std::endl
<< std::endl;
} else {
out << std::setw(22) << std::setprecision(15) << std::fixed << im->second;
ims = mParamStr[0].find(0);
// This output does not work on BG/Q.
out << std::endl
<< std::fixed << "Branch lengths:" << ims->second << std::endl;
}
out << " LnL1: ";
// Prints LnL for H1 if present
im = mLnL[1].find(0);
if (im == mLnL[1].end()) {
out << std::setw(22) << "NA";
} else {
out << std::setw(22) << std::setprecision(15) << std::fixed << im->second;
ims = mParamStr[1].find(0);
out << std::endl
<< std::fixed << "Branch lengths:" << ims->second << std::endl;
}
out << std::endl;
//}
// }
// Write the positive selection sites (adding one to the site number because
// they start from 1 and not zero)
// for (size_t branch = min_branch; branch <= max_branch; ++branch) {
std::map<size_t, std::pair<std::vector<unsigned int>,
std::vector<double> > >::const_iterator ipss;
ipss = mPositiveSelSites.find(0);
if (ipss != mPositiveSelSites.end()) {
const std::vector<unsigned int> &site = ipss->second.first;
const std::vector<double> &prob = ipss->second.second;
const std::vector<size_t> &idx = orderSites(site);
size_t ns = site.size();
for (size_t s = 0; s < ns; ++s) {
size_t i = idx[s];
out << "Positive Selection Sites for branch(es): " << std::setw(4) ;
for (std::set<int>::iterator it = aFgSet.begin();it != aFgSet.end(); ++it)
{out << " " << *it << " ";}
out << " Site: " << std::setw(6) << site[i] + 1
<< " Prob: " << std::setw(9) << std::setprecision(6) << std::fixed
<< prob[i] << std::endl;
}
}
// }
out.close();
}
const std::vector<size_t> &
WriteResultsMfg::orderSites(const std::vector<unsigned int> &aSites) const {
// Fill the map with pairs(site, its index)
std::multimap<unsigned int, size_t> ordered_map;
size_t end(aSites.size());
for (size_t i = 0; i < end; ++i)
ordered_map.insert(std::pair<unsigned int, size_t>(aSites[i], i));
// Take the list of indices after ordering the list of sites
mSiteOrder.clear();
std::multimap<unsigned int, size_t>::const_iterator im(ordered_map.begin());
std::multimap<unsigned int, size_t>::const_iterator endm(ordered_map.end());
for (; im != endm; ++im)
mSiteOrder.push_back(im->second);
return mSiteOrder;
}
void WriteResultsMfg::saveLnL(std::set<int> aFgBranchSet, double aLnL,
unsigned int aHypothesis) {
// If no file set, then do nothing
if (!mFilename)
return;
// Sanity check
if (aHypothesis > 1)
return;
// Save the likelihood for later printing
mLnL[aHypothesis][0] = aLnL;
}
void WriteResultsMfg::savePositiveSelSites(
std::set<int> aFgBranchSet, const std::vector<unsigned int> &aPositiveSelSites,
const std::vector<double> &aPositiveSelSitesProb) {
// If no file set, then do nothing
if (!mFilename)
return;
// Save the positive selection sites and corresponding probabilities for later
// printing
mPositiveSelSites[0] =
std::make_pair(aPositiveSelSites, aPositiveSelSitesProb);
}
void WriteResultsMfg::saveParameters(std::set<int> aFgBranchSet, std::string &aParamStr,
unsigned int aHypothesis) {
// If no file set, then do nothing
if (!mFilename)
return;
// Sanity check
if (aHypothesis > 1)
return;
// Save the likelihood for later printing
mParamStr[aHypothesis][0] = aParamStr;
}
......@@ -3,6 +3,7 @@
#define WRITERESULTS_H
#include <map>
#include <set>
#include <vector>
#include <utility>
......@@ -31,8 +32,10 @@ public:
}
/// Output the results to the file given
/// @param[in] aIbSet Set of internal branches
/// @param[in] aBranchAll Whether fg branches are just internal or internals+leaves
///
void outputResults(void);
void outputResults(std::set<int> aIbSet, bool aBranchAll);
/// Save the likelihood for later printing.
///
......@@ -44,22 +47,9 @@ public:
///
void saveLnL(size_t aFgBranch, double aLnL, unsigned int aHypothesis);
/// Save the likelihood for later printing (multiple fg).
///
/// @param[in] aFgBranchSet The foreground branches to which the
/// log-likelihood refers
/// @param[in] aLnL The log-likelihood
/// @param[in] aHypothesis The hypothesis (0 for H0 and 1 for H1) for which
/// the log-likelihood has been computed
///
///
void savePositiveSelSites(size_t aFgBranch,
const std::vector<unsigned int> &aPositiveSelSites,
const std::vector<double> &aPositiveSelSitesProb);
/// Save the positive selection sites and corresponding probabilities for
/// later printing. (multiple fg)
/// later printing.
///
/// @param[in] aFgBranch The foreground branch to which the sites refers
/// @param[in] aPositiveSelSites The site in which the positive selection
......@@ -68,7 +58,15 @@ public:
/// by the BEB test
///
void savePositiveSelSites(size_t aFgBranch,
const std::vector<unsigned int> &aPositiveSelSites,
const std::vector<double> &aPositiveSelSitesProb);
/// Check if anything will be output at the end.
///
/// @return True if the output to file is enabled.
///
bool isWriteResultsEnabled(void) const { return mFilename != NULL; }
/// Returns the correct index order to have the sites printed in the correct
......@@ -108,4 +106,100 @@ private:
/// hypothesis
};
/// Write all results to a given file for the case of fg branches marked in nwk file.
class WriteResultsMfg {
public:
/// Constructor
///
/// @param[in] aFilename The filename to which the output should go. If null
/// no printing will happen
///
explicit WriteResultsMfg(const char *aFilename) : mFilename(aFilename) {}
/// Destructor.
///
~WriteResultsMfg() {
mLnL[0].clear();
mLnL[1].clear();
mPositiveSelSites.clear();
}
/// Output the results to the file given
/// @param[in] aFgSet The set of marked fg branches
///
void outputResults(std::set<int> aFgSet);
/// Save the likelihood for later printing.
///
/// @param[in] aFgBranchSet The marked foreground branches to which the log-likelihood
/// refers
/// @param[in] aLnL The log-likelihood
/// @param[in] aHypothesis The hypothesis (0 for H0 and 1 for H1) for which
/// the log-likelihood has been computed
///
void saveLnL(std::set<int> aFgBranchSet, double aLnL, unsigned int aHypothesis);
/// Save the positive selection sites and corresponding probabilities for
/// later printing.
///
/// @param[in] aFgBranchSet The marked foreground branches to which the sites refers
/// @param[in] aPositiveSelSites The site in which the positive selection
/// appears as computed by the BEB test
/// @param[in] aPositiveSelSitesProb The corresponding probability as computed
/// by the BEB test
///
void savePositiveSelSites(std::set<int> aFgBranchSet,
const std::vector<unsigned int> &aPositiveSelSites,
const std::vector<double> &aPositiveSelSitesProb);
/// Check if anything will be output at the end.
///
/// @return True if the output to file is enabled.
///
bool isWriteResultsEnabled(void) const { return mFilename != NULL; }
/// Returns the correct index order to have the sites printed in the correct
/// order.
///
/// @param[in] aSites The vector of site numbers
///
/// @return The list of indices that gives the sites in the correct order.
///
const std::vector<size_t> &
orderSites(const std::vector<unsigned int> &aSites) const;
/// Save the parameters string for later printing.
///
/// @param[in] aFgBranchSet The marked foreground branches to which the log-likelihood
/// refers
/// @param[in] aParamStr The parameters string
/// @param[in] aHypothesis The hypothesis (0 for H0 and 1 for H1) for which
/// the log-likelihood has been computed
///
void saveParameters(std::set<int> aFgBranchSet, std::string &aParamStr,
unsigned int aHypothesis);
private:
const char *mFilename; ///< The file to which the results should be written.
/// If null, no printing appear
std::map<size_t, double> mLnL[2]; ///< The log-likelihood for the given marekd fg
/// branches and for the two hypothesis
std::map<size_t, std::pair<std::vector<unsigned int>, std::vector<double> > >
mPositiveSelSites; ///< The sites under positive selection and the
/// corresponding probabilities for a given marked fg branches
mutable std::vector<size_t>
mSiteOrder; ///< The new site+prob order computed by orderSites routine
std::map<size_t, std::string> mParamStr[2]; ///< The parameters string for the
/// given marked fg branches and for the two
/// hypothesis
};
#endif
/// test
/// @mainpage FastCodeML
///
/// @section intro_sect Introduction
......@@ -204,20 +205,20 @@ int main(int aRgc, char **aRgv) {
if (cmd.mResultsFile)
std::cout << "Results file: " << cmd.mResultsFile
<< std::endl;
if (cmd.mNumThreads)
std::cout << "Number of threads: " << cmd.mNumThreads