Commit f52bff01 authored by oshahmir's avatar oshahmir
Browse files

Corrected issue 35 (output file not found)

parent 3a492bbe
[Buildset]
BuildItems=@Variant(\x00\x00\x00\t\x00\x00\x00\x00\x01\x00\x00\x00\x0b\x00\x00\x00\x00\x01\x00\x00\x00\x1c\x00f\x00a\x00s\x00t\x00c\x00o\x00d\x00e\x00m\x00l\x00.\x00g\x00i\x00t)
[CMake]
Build Directory Count=1
Current Build Directory Index=0
ProjectRootRelative=./
[CMake][CMake Build Directory 0]
Build Directory Path=file:///home/omid/Projects/fastcodeml.git/build
Build Type=Debug
CMake Binary=file:///usr/bin/cmake
Environment Profile=
Extra Arguments=
Install Directory=file:///usr/local
[Defines And Includes][Compiler]
Name=GCC
Path=gcc
Type=GCC
[Launch]
Launch Configurations=Launch Configuration 0
[Launch][Launch Configuration 0]
Configured Launch Modes=execute
Configured Launchers=nativeAppLauncher
Name=fast
Type=Native Application
[Launch][Launch Configuration 0][Data]
Arguments=-m 22 -bl -s 0 -nt 1 ../bad_cases/HBG010916-1.M0.009.nwk ../bad_cases/HBG010916-1.phy
Dependencies=@Variant(\x00\x00\x00\t\x00\x00\x00\x00\x01\x00\x00\x00\x0b\x00\x00\x00\x00\x02\x00\x00\x00\x1c\x00f\x00a\x00s\x00t\x00c\x00o\x00d\x00e\x00m\x00l\x00.\x00g\x00i\x00t\x00\x00\x00\x08\x00f\x00a\x00s\x00t)
Dependency Action=Build
EnvironmentGroup=
Executable=file:///home/omid/Projects/fastcodeml.git/
External Terminal=konsole --noclose --workdir %workdir -e %exe
Project Target=fastcodeml.git,fast
Use External Terminal=false
Working Directory=
isExecutable=false
[Project]
VersionControlSupport=kdevgit
......@@ -547,14 +547,14 @@ double MfgBayesTest::getGridParams(const std::vector<double> &aVars,
}
// Fill the matrices and compute their eigendecomposition.
#ifdef _MSC_VER
#pragma omp parallel sections default(none) shared( \
//#ifdef _MSC_VER
//#pragma omp parallel sections default(none) shared( \
omega_fg, omega_bg, kappa, q_fg, q_bg, omega_fg_is_one, omega_bg_is_one)
#else
#pragma omp parallel sections default(shared)
#endif
//#else
//#pragma omp parallel sections default(shared)
//#endif
{
#pragma omp section
//#pragma omp section
{
if (omega_fg_is_one)
q_fg.fillMatrix(kappa);
......@@ -563,7 +563,7 @@ double MfgBayesTest::getGridParams(const std::vector<double> &aVars,
q_fg.eigenQREV();
}
#pragma omp section
//#pragma omp section
{
if (omega_bg_is_one)
q_bg.fillMatrix(kappa);
......@@ -580,6 +580,7 @@ double MfgBayesTest::getGridParams(const std::vector<double> &aVars,
// Compute likelihoods
CacheAlignedDoubleVector likelihoods(mNumSites);
//probably main issue for beb non-determinism
mForest.computeLikelihoods(mBEBset, likelihoods,
mDependencies.getDependencies());
for (size_t site = 0; site < mNumSites; ++site) {
......@@ -587,6 +588,15 @@ double MfgBayesTest::getGridParams(const std::vector<double> &aVars,
mPriors[iw * mNumSites + site] =
(p > 0) ? log(p) : -184.2068074395237; // If p < 0 then the value in
// codeml.c is: log(1e-80);
// debug
if (site==982)
std::cout<<"before normalization mPriors[site] for cat " << iw << " is "<< mPriors[iw * mNumSites + site] << std::endl;
// debug
if (site==982)
std::cout<<"likeliohoods[site] for cat " << iw << " is " << likelihoods[site] << std::endl;
}
}
......@@ -604,9 +614,16 @@ double MfgBayesTest::getGridParams(const std::vector<double> &aVars,
// remove the previous log.
for (size_t k = 0; k < BEB_NUM_CAT; ++k) {
mPriors[k * mNumSites + site] = exp(mPriors[k * mNumSites + site] - fh);
// debug
if (site==982)
std::cout<<"mPriors[site] for cat " << k << " is "<< mPriors[k * mNumSites + site] << std::endl;
}
scale += fh * aSiteMultiplicity[site];
}
return scale;
}
......@@ -821,7 +838,9 @@ void MfgBayesTest::computeBEB(const std::vector<double> &aVars,
std::cout << std::endl
<< "Calculating f(w|X), posterior probs of site classes."
<< std::endl;
//debug
std::cout << "mNumSites: " << mNumSites << std::endl;
for (unsigned int site = 0; site < mNumSites; ++site) {
scale1 = -1e300;
......@@ -857,6 +876,14 @@ void MfgBayesTest::computeBEB(const std::vector<double> &aVars,
}
for (unsigned int j = 0; j < BEB_DIMS; ++j)
mSiteClassProb[j * mNumSites + site] *= exp(scale1 - fX);
//debug
if (site==982)
{
std::cout << "scale (t=log(fhk[codon_class]) + lnfXs[igrid]): " << scale1 << " fX: " << fX << " site multiplicity " << site_multiplicity[site] <<" prob: " << mSiteClassProb[2 * mNumSites + site] + mSiteClassProb[3 * mNumSites + site] << std::endl;
}
}
}
......@@ -903,6 +930,8 @@ void MfgBayesTest::printPositiveSelSites(std::set<int> aFgBranchSet) const {
ordered_map.insert(std::pair<size_t, size_t>(it->second, current_idx));
probs.push_back(prob);
++current_idx;
//debug
std::cout<< "idx: " << current_idx-1 << " prob: " << prob << " site: " << site << std::endl;
}
}
}
......@@ -925,6 +954,11 @@ void MfgBayesTest::printPositiveSelSites(std::set<int> aFgBranchSet) const {
std::cout << std::setw(6) << im->first + 1 << ' ' << std::fixed
<< std::setprecision(6) << prob << sig << std::endl;
//debug
if (im->first+1 == 1030)
std::cout<<"reduced site num for 1030: "<<im->second<<std::endl;
}
}
......
......@@ -194,3 +194,5 @@ endif(MPI_LIBRARY)
add_subdirectory(bad_cases)
\ No newline at end of file
......@@ -906,16 +906,16 @@ void Forest::computeLikelihoods(const mfgProbabilityMatrixSet &aSet,
const int len = static_cast<int>(ivs->size());
const unsigned int *tmp_ivs = &(*ivs)[0];
#ifdef _MSC_VER
#pragma omp parallel for default(none) shared(aSet, len, tmp_ivs, tmp_roots, \
//#ifdef _MSC_VER
//#pragma omp parallel for default(none) shared(aSet, len, tmp_ivs, tmp_roots, \
likelihoods) schedule(static)
#else
#pragma omp parallel for default(shared)
#endif
//#else
//#pragma omp parallel for default(shared)
//#endif
for (int i = 0; i < len; ++i) {
#ifndef _MSC_VER
#pragma omp task untied
#endif
//#ifndef _MSC_VER
//#pragma omp task untied
//#endif
{
// Compute likelihood array at the root of one tree (the access order is
// the fastest)
......
......@@ -25,6 +25,8 @@
#include "BayesTest.h"
#include "VerbosityLevels.h"
static const int INVALID_RANK = -1;
enum JobStatus {
......
......@@ -6,7 +6,7 @@
#include "WriteResults.h"
#include <string>
void WriteResults::outputResults(void) {
void WriteResults::outputResults(std::set<int> aIbSet, bool aBranchAll) {
// If no file set, then do nothing
if (!mFilename)
return;
......@@ -47,6 +47,9 @@ void WriteResults::outputResults(void) {
// Write the log-likelihood values (if a value is not present, write NA)
for (size_t branch = min_branch; branch <= max_branch; ++branch) {
if (aBranchAll || (aIbSet.find(branch) != aIbSet.end())) {
out << "Branch: " << std::setw(4) << branch << std::endl
<< std::endl
<< " LnL0: ";
......@@ -78,6 +81,7 @@ void WriteResults::outputResults(void) {
<< std::fixed << "Branch lengths:" << ims->second << std::endl;
}
out << std::endl;
}
}
// Write the positive selection sites (adding one to the site number because
......@@ -95,7 +99,7 @@ void WriteResults::outputResults(void) {
size_t ns = site.size();
for (size_t s = 0; s < ns; ++s) {
size_t i = idx[s];
out << "PositiveSelectionSite for branch: " << std::setw(4) << branch;
out << "Positive Selection Sites for branch: " << std::setw(4) << branch;
out << " Site: " << std::setw(6) << site[i] + 1
<< " Prob: " << std::setw(9) << std::setprecision(6) << std::fixed
<< prob[i] << std::endl;
......@@ -164,3 +168,179 @@ void WriteResults::saveParameters(size_t aFgBranch, std::string &aParamStr,
// Save the likelihood for later printing
mParamStr[aHypothesis][aFgBranch] = aParamStr;
}
// reporting methods where fg branches are marked in the nwk file
void WriteResultsMfg::outputResults(std::set<int> aFgSet) {
// If no file set, then do nothing
if (!mFilename)
return;
// Range of branches to be output (for H0 and H1)
std::map<size_t, double>::const_iterator im;
std::map<size_t, std::string>::const_iterator ims;
//size_t min_branch = std::numeric_limits<size_t>::max();
//size_t max_branch = 0;
//for (im = mLnL[0].begin(); im != mLnL[0].end(); ++im) {
//size_t v = im->first;
//if (v < min_branch)
// min_branch = v;
//if (v > max_branch)
// max_branch = v;
// }
//for (im = mLnL[1].begin(); im != mLnL[1].end(); ++im) {
// size_t v = im->first;
// if (v < min_branch)
// min_branch = v;
// if (v > max_branch)
// max_branch = v;
// }
// No entries, so do nothing
// if (min_branch == std::numeric_limits<size_t>::max())
// return;
// Open the output file
std::ofstream out(mFilename, std::ios_base::trunc | std::ios_base::out);
if (!out.good()) {
std::cout << "Cannot create results file <" << mFilename << ">"
<< std::endl;
return;
}
// Write the log-likelihood values (if a value is not present, write NA)
// for (size_t branch = min_branch; branch <= max_branch; ++branch) {
// if (aBranchAll || (aIbSet.find(branch) != aIbSet.end())) {
out << "Branch(es): " << std::setw(4) ;
for (std::set<int>::iterator it = aFgSet.begin();it != aFgSet.end(); ++it)
{out << " " << *it << " ";}
out << std::endl
<< std::endl
<< " LnL0: ";
// Prints LnL for H0 if present
im = mLnL[0].find(0);
if (im == mLnL[0].end()) {
out << std::setw(22) << "NA" << std::endl
<< std::endl;
} else {
out << std::setw(22) << std::setprecision(15) << std::fixed << im->second;
ims = mParamStr[0].find(0);
// This output does not work on BG/Q.
out << std::endl
<< std::fixed << "Branch lengths:" << ims->second << std::endl;
}
out << " LnL1: ";
// Prints LnL for H1 if present
im = mLnL[1].find(0);
if (im == mLnL[1].end()) {
out << std::setw(22) << "NA";
} else {
out << std::setw(22) << std::setprecision(15) << std::fixed << im->second;
ims = mParamStr[1].find(0);
out << std::endl
<< std::fixed << "Branch lengths:" << ims->second << std::endl;
}
out << std::endl;
//}
// }
// Write the positive selection sites (adding one to the site number because
// they start from 1 and not zero)
// for (size_t branch = min_branch; branch <= max_branch; ++branch) {
std::map<size_t, std::pair<std::vector<unsigned int>,
std::vector<double> > >::const_iterator ipss;
ipss = mPositiveSelSites.find(0);
if (ipss != mPositiveSelSites.end()) {
const std::vector<unsigned int> &site = ipss->second.first;
const std::vector<double> &prob = ipss->second.second;
const std::vector<size_t> &idx = orderSites(site);
size_t ns = site.size();
for (size_t s = 0; s < ns; ++s) {
size_t i = idx[s];
out << "Positive Selection Sites for branch(es): " << std::setw(4) ;
for (std::set<int>::iterator it = aFgSet.begin();it != aFgSet.end(); ++it)
{out << " " << *it << " ";}
out << " Site: " << std::setw(6) << site[i] + 1
<< " Prob: " << std::setw(9) << std::setprecision(6) << std::fixed
<< prob[i] << std::endl;
}
}
// }
out.close();
}
const std::vector<size_t> &
WriteResultsMfg::orderSites(const std::vector<unsigned int> &aSites) const {
// Fill the map with pairs(site, its index)
std::multimap<unsigned int, size_t> ordered_map;
size_t end(aSites.size());
for (size_t i = 0; i < end; ++i)
ordered_map.insert(std::pair<unsigned int, size_t>(aSites[i], i));
// Take the list of indices after ordering the list of sites
mSiteOrder.clear();
std::multimap<unsigned int, size_t>::const_iterator im(ordered_map.begin());
std::multimap<unsigned int, size_t>::const_iterator endm(ordered_map.end());
for (; im != endm; ++im)
mSiteOrder.push_back(im->second);
return mSiteOrder;
}
void WriteResultsMfg::saveLnL(std::set<int> aFgBranchSet, double aLnL,
unsigned int aHypothesis) {
// If no file set, then do nothing
if (!mFilename)
return;
// Sanity check
if (aHypothesis > 1)
return;
// Save the likelihood for later printing
mLnL[aHypothesis][0] = aLnL;
}
void WriteResultsMfg::savePositiveSelSites(
std::set<int> aFgBranchSet, const std::vector<unsigned int> &aPositiveSelSites,
const std::vector<double> &aPositiveSelSitesProb) {
// If no file set, then do nothing
if (!mFilename)
return;
// Save the positive selection sites and corresponding probabilities for later
// printing
mPositiveSelSites[0] =
std::make_pair(aPositiveSelSites, aPositiveSelSitesProb);
}
void WriteResultsMfg::saveParameters(std::set<int> aFgBranchSet, std::string &aParamStr,
unsigned int aHypothesis) {
// If no file set, then do nothing
if (!mFilename)
return;
// Sanity check
if (aHypothesis > 1)
return;
// Save the likelihood for later printing
mParamStr[aHypothesis][0] = aParamStr;
}
......@@ -3,6 +3,7 @@
#define WRITERESULTS_H
#include <map>
#include <set>
#include <vector>
#include <utility>
......@@ -31,8 +32,10 @@ public:
}
/// Output the results to the file given
/// @param[in] aIbSet Set of internal branches
/// @param[in] aBranchAll Whether fg branches are just internal or internals+leaves
///
void outputResults(void);
void outputResults(std::set<int> aIbSet, bool aBranchAll);
/// Save the likelihood for later printing.
///
......@@ -44,22 +47,9 @@ public:
///
void saveLnL(size_t aFgBranch, double aLnL, unsigned int aHypothesis);
/// Save the likelihood for later printing (multiple fg).
///
/// @param[in] aFgBranchSet The foreground branches to which the
/// log-likelihood refers
/// @param[in] aLnL The log-likelihood
/// @param[in] aHypothesis The hypothesis (0 for H0 and 1 for H1) for which
/// the log-likelihood has been computed
///
///
void savePositiveSelSites(size_t aFgBranch,
const std::vector<unsigned int> &aPositiveSelSites,
const std::vector<double> &aPositiveSelSitesProb);
/// Save the positive selection sites and corresponding probabilities for
/// later printing. (multiple fg)
/// later printing.
///
/// @param[in] aFgBranch The foreground branch to which the sites refers
/// @param[in] aPositiveSelSites The site in which the positive selection
......@@ -67,8 +57,16 @@ public:
/// @param[in] aPositiveSelSitesProb The corresponding probability as computed
/// by the BEB test
///
void savePositiveSelSites(size_t aFgBranch,
const std::vector<unsigned int> &aPositiveSelSites,
const std::vector<double> &aPositiveSelSitesProb);
/// Check if anything will be output at the end.
///
/// @return True if the output to file is enabled.
///
bool isWriteResultsEnabled(void) const { return mFilename != NULL; }
/// Returns the correct index order to have the sites printed in the correct
......@@ -108,4 +106,100 @@ private:
/// hypothesis
};
/// Write all results to a given file for the case of fg branches marked in nwk file.
class WriteResultsMfg {
public:
/// Constructor
///
/// @param[in] aFilename The filename to which the output should go. If null
/// no printing will happen
///
explicit WriteResultsMfg(const char *aFilename) : mFilename(aFilename) {}
/// Destructor.
///
~WriteResultsMfg() {
mLnL[0].clear();
mLnL[1].clear();
mPositiveSelSites.clear();
}
/// Output the results to the file given
/// @param[in] aFgSet The set of marked fg branches
///
void outputResults(std::set<int> aFgSet);
/// Save the likelihood for later printing.
///
/// @param[in] aFgBranchSet The marked foreground branches to which the log-likelihood
/// refers
/// @param[in] aLnL The log-likelihood
/// @param[in] aHypothesis The hypothesis (0 for H0 and 1 for H1) for which
/// the log-likelihood has been computed
///
void saveLnL(std::set<int> aFgBranchSet, double aLnL, unsigned int aHypothesis);
/// Save the positive selection sites and corresponding probabilities for
/// later printing.
///
/// @param[in] aFgBranchSet The marked foreground branches to which the sites refers
/// @param[in] aPositiveSelSites The site in which the positive selection
/// appears as computed by the BEB test
/// @param[in] aPositiveSelSitesProb The corresponding probability as computed
/// by the BEB test
///
void savePositiveSelSites(std::set<int> aFgBranchSet,
const std::vector<unsigned int> &aPositiveSelSites,
const std::vector<double> &aPositiveSelSitesProb);
/// Check if anything will be output at the end.
///
/// @return True if the output to file is enabled.
///
bool isWriteResultsEnabled(void) const { return mFilename != NULL; }
/// Returns the correct index order to have the sites printed in the correct
/// order.
///
/// @param[in] aSites The vector of site numbers
///
/// @return The list of indices that gives the sites in the correct order.
///
const std::vector<size_t> &
orderSites(const std::vector<unsigned int> &aSites) const;
/// Save the parameters string for later printing.
///
/// @param[in] aFgBranchSet The marked foreground branches to which the log-likelihood
/// refers
/// @param[in] aParamStr The parameters string
/// @param[in] aHypothesis The hypothesis (0 for H0 and 1 for H1) for which
/// the log-likelihood has been computed
///
void saveParameters(std::set<int> aFgBranchSet, std::string &aParamStr,
unsigned int aHypothesis);
private:
const char *mFilename; ///< The file to which the results should be written.
/// If null, no printing appear
std::map<size_t, double> mLnL[2]; ///< The log-likelihood for the given marekd fg
/// branches and for the two hypothesis
std::map<size_t, std::pair<std::vector<unsigned int>, std::vector<double> > >
mPositiveSelSites; ///< The sites under positive selection and the
/// corresponding probabilities for a given marked fg branches
mutable std::vector<size_t>
mSiteOrder; ///< The new site+prob order computed by orderSites routine
std::map<size_t, std::string> mParamStr[2]; ///< The parameters string for the
/// given marked fg branches and for the two
/// hypothesis
};
#endif
(((((((((a0009:0.005122,a0005:0.005553):0.021588,(a0000:0.09922,a0003:0.056644):0.214725):0.010754,a0010:0.015175):0.074881,a0001:0.06595):0.010737,a0014:0.314409):0.007601,a0013:0.083805):0.264158,a0007:0.430493):0.340044,a0002:1.259416):0.66615,(((a0008:0.090909,a0011:0.107215):0.183011,(a0012:0.283332,a0004:0.851209):0.052773):0.459232,a0006:0.540195)#1:0.694423);
This diff is collapsed.
(((((((a0000:0.150779,a0011:0.366606):0.020186,(a0008:0.189257,a0012:0.064304):0.491414):0.003135,(a0005:0.027918,(a0014:0.008119,a0002:0.002763):0.019253):0.07771):0.185033,a0009:0.275464):0.185673,a0003:0.29294):0.394691,a0013:1.474877)#1:0.414072,(((a0006:0.64944,a0001:0.369005):0.077098,(a0004:0.342883,a0010:0.114781):0.128282):0.442747,a0007:3.987982):0.466218);
This diff is collapsed.
(((a0003:0.940287,(a0006:0.225433,((a0008:0.070547,(a0013:0.069575,((a0014:0.003074,a0001:0.01845):4e-06,a0005:0.005609):0.079689):0.000897):0.011044,(a0007:0.048967,a0009:0.066015):0.166874):0.139738)#1:0.1328):0.2447,a0012:0.90415):0.965229,((((a0010:0.146055,a0004:0.095622):0.272589,a0011:0.127978):0.109596,a0002:0.382271):0.498846,a0000:0.498494):1.032012);
This diff is collapsed.
(((((((a0001:0.075468,a0005:0.085411):0.207804,(a0010:0.720769,a0006:0.020905):0.105001):0.012384,(a0000:0.142497,a0012:0.282057)#1:0.018899):0.178792,a0009:0.443433)#1:0.714179,a0008:0.474661):0.143467,a0011:1.098262):0.162709,((a0007:0.348344,(a0003:0.324275,(a0013:0.17518,a0002:0.376901):0.198321):0.112915):0.242393,a0004:0.617143):0.19161);
This diff is collapsed.
CODONML (in paml version 4.8a, July 2014) ./bad_cases/HBG010916-1.phy
Model: several dN/dS ratios for branches for branches, omega = 1.000 fixed
Codon frequency model: F3x4
Site-class models: PositiveSelection
ns = 14 ls = 1038
Codon usage in sequences
--------------------------------------------------------------------------------------------------------------------------------------
Phe TTT 34 22 18 11 16 21 | Ser TCT 13 19 16 5 19 16 | Tyr TAT 20 20 22 4 11 21 | Cys TGT 12 11 9 2 10 8
TTC 16 15 20 29 19 18 | TCC 17 15 15 29 22 20 | TAC 13 14 17 30 22 19 | TGC 9 8 8 15 9 13
Leu TTA 10 1 1 0 2 2 | TCA 10 6 11 6 10 7 | *** TAA 0 0 0 0 0 0 | *** TGA 0 0 0 0 0 0
TTG 13 13 17 5 15 15 | TCG 3 2 4 7 1 7 | TAG 0 0 0 0 0 0 | Trp TGG 13 9 8 7 9 8
--------------------------------------------------------------------------------------------------------------------------------------
Leu CTT 12 7 13 4 8 7 | Pro CCT 12 14 13 6 12 6 | His CAT 12 11 8 1 9 10 | Arg CGT 9 11 6 4 10 7
CTC 22 21 15 20 22 17 | CCC 13 13 8 25 16 20 | CAC 8 6 9 16 9 9 | CGC 14 15 11 32 20 16
CTA 13 15 2 1 11 7 | CCA 19 16 15 1 15 7 | Gln CAA 5 6 10 0 5 3 | CGA 7 9 6 2 5 7
CTG 42 49 38 67 47 53 | CCG 1 2 5 11 3 8 | CAG 39 38 28 38 39 39 | CGG 13 13 4 13 13 6
--------------------------------------------------------------------------------------------------------------------------------------
Ile ATT 21 22 25 8 20 24 | Thr ACT 12 10 19 5 10 14 | Asn AAT 15 11 15 4 7 15 | Ser AGT 8 4 13 2 9 13
ATC 32 31 31 54 33 30 | ACC 21 22 14 29 23 20 | AAC 16 20 29 35 23 26 | AGC 19 23 13 22 15 12
ATA 12 3 9 0 3 8 | ACA 22 21 18 6 18 11 | Lys AAA 19 10 28 3 7 16 | Arg AGA 4 2 12 1 3 2
Met ATG 26 20 34 24 21 29 | ACG 3 6 5 18 7 11 | AAG 33 36 35 51 38 34 | AGG 4 6 7 2 7 5
--------------------------------------------------------------------------------------------------------------------------------------
Val GTT 13 9 29 4 7 18 | Ala GCT 22 26 27 12 28 20 | Asp GAT 18 16 22 8 16 19 | Gly GGT 18 20 18 12 15 10
GTC 18 19 18 23 24 22 | GCC 32 46 30 47 41 42 | GAC 26 28 26 37 28 25 | GGC 28 30 14 42 30 26