Commit 74cadc80 authored by mvalle's avatar mvalle
Browse files

Implemented CPV scaling

Rationalized include files
Fixed CMakeLists for MPI on eiger


git-svn-id: https://svn.vital-it.ch/svn/hp2c/trunk/Codeml_Variants/Fastcodeml@5881 95c5a404-1f88-0410-a6b0-c3f062b6f34a
parent 53d56791
......@@ -46,11 +46,11 @@ OPTION(USE_MKL_VML "Use Intel MKL vectorized routines" OFF)
OPTION(USE_OPENMP "Compile with OpenMP support" ON)
OPTION(USE_MPI "Use MPI for high level parallelization" OFF)
if(NOT WIN32)
OPTION(BUILD_NOT_SHARED "Build FastCodeML not shared" OFF)
OPTION(BUILD_NOT_SHARED "Build FastCodeML not shared" OFF)
endif(NOT WIN32)
OPTION(BUILD_SEARCH_MPI "Search for MPI installation?" OFF)
OPTION(USE_ORIGINAL_PROPORTIONS "Use the original CodeML proportion definition" OFF)
SET( USE_LIKELIHOOD_METHOD "Original" CACHE STRING "Select the type of likelihood computation method: Original, NonRecursive, FatVector, DAG")
SET(USE_LIKELIHOOD_METHOD "Original" CACHE STRING "Select the type of likelihood computation method: Original, NonRecursive, FatVector, DAG")
SET_PROPERTY(CACHE USE_LIKELIHOOD_METHOD PROPERTY STRINGS Original NonRecursive FatVector DAG)
OPTION(USE_IDENTITY_MATRIX "Force identity matrix when time is zero" OFF)
OPTION(USE_CPV_SCALING "Scale conditional probability vectors to avoid under/overflow" OFF)
......@@ -124,28 +124,35 @@ if(USE_IDENTITY_MATRIX)
add_definitions(-DFORCE_IDENTITY_MATRIX)
endif(USE_IDENTITY_MATRIX)
if(USE_CPV_SCALING)
add_definitions(-DUSE_CPV_SCALING)
endif(USE_CPV_SCALING)
# Select on method to compute likelihood
if(USE_LIKELIHOOD_METHOD MATCHES "NonRecursive")
add_definitions(-DNON_RECURSIVE_VISIT)
add_definitions(-DNON_RECURSIVE_VISIT)
unset(USE_CPV_SCALING)
endif(USE_LIKELIHOOD_METHOD MATCHES "NonRecursive")
if(USE_LIKELIHOOD_METHOD MATCHES "FatVector")
add_definitions(-DNEW_LIKELIHOOD)
unset(USE_CPV_SCALING)
endif(USE_LIKELIHOOD_METHOD MATCHES "FatVector")
if(USE_LIKELIHOOD_METHOD MATCHES "DAG")
add_definitions(-DUSE_DAG)
unset(USE_CPV_SCALING)
endif(USE_LIKELIHOOD_METHOD MATCHES "DAG")
# CPV scaling is valid only for the Original likelihood method
if(USE_CPV_SCALING)
add_definitions(-DUSE_CPV_SCALING)
endif(USE_CPV_SCALING)
# Set paths
if(USE_MPI)
if(MPI_C_INCLUDE_PATH)
include_directories(${MPI_C_INCLUDE_PATH})
endif(MPI_C_INCLUDE_PATH)
if(MPI_INCLUDE_PATH)
include_directories(${MPI_INCLUDE_PATH})
endif(MPI_INCLUDE_PATH)
if(MPI_LIBRARY)
link_directories(${MPI_LIBRARY})
endif(MPI_LIBRARY)
......@@ -220,8 +227,6 @@ if($ENV{HOST} MATCHES "eiger")
endif($ENV{HOST} MATCHES "eiger")
#set(CMAKE_CXX_FLAGS_DEBUG "-g -O -Wall -std=c++98 -Wextra -Wno-unused-parameter -Wunused" CACHE "Debug mode options" STRING)
###########################################################
......
......@@ -91,7 +91,6 @@ void Forest::loadTreeAndGenes(const PhyloTree& aTree, const Genes& aGenes, Codon
#else
for(int set=0; set < Nt; ++set) aGenes.setLeaveProb((*il)->mProb[set]);
#endif
// Count codons
aGenes.updateCodonCount(codon_count, mult[site]);
}
......@@ -851,7 +850,11 @@ void Forest::computeLikelihoods(const ProbabilityMatrixSet& aSet, CacheAlignedDo
const double* g = computeLikelihoodsWalkerTC(tmp_roots+site, aSet, set_idx);
#ifdef USE_CPV_SCALING
likelihoods[set_idx*mNumSites+site] = dot(mCodonFreq, g)*g[N];
#else
likelihoods[set_idx*mNumSites+site] = dot(mCodonFreq, g);
#endif
}
}
}
......@@ -883,14 +886,27 @@ double* Forest::computeLikelihoodsWalkerTC(const ForestNode* aNode, const Probab
if(first)
{
aSet.doTransitionAtLeaf(aSetIdx, branch_id, leaf_codon, anode_prob);
#ifdef USE_CPV_SCALING
anode_prob[N] = normalizeVector(anode_prob);
if(anode_other_tree_prob) memcpy(anode_other_tree_prob+VECTOR_SLOT*aSetIdx, anode_prob, (N+1)*sizeof(double));
#else
if(anode_other_tree_prob) memcpy(anode_other_tree_prob+VECTOR_SLOT*aSetIdx, anode_prob, N*sizeof(double));
#endif
first = false;
}
else
{
#ifdef USE_CPV_SCALING
double ALIGN64 temp[N+1];
#else
double ALIGN64 temp[N];
#endif
double* x = anode_other_tree_prob ? anode_other_tree_prob+VECTOR_SLOT*aSetIdx : temp;
aSet.doTransitionAtLeaf(aSetIdx, branch_id, leaf_codon, x);
#ifdef USE_CPV_SCALING
x[N] = normalizeVector(x);
anode_prob[N] *= x[N];
#endif
elementWiseMult(anode_prob, x);
}
}
......@@ -898,15 +914,30 @@ double* Forest::computeLikelihoodsWalkerTC(const ForestNode* aNode, const Probab
{
if(first)
{
aSet.doTransition(aSetIdx, branch_id, computeLikelihoodsWalkerTC(m, aSet, aSetIdx), anode_prob);
double* g = computeLikelihoodsWalkerTC(m, aSet, aSetIdx);
aSet.doTransition(aSetIdx, branch_id, g, anode_prob);
#ifdef USE_CPV_SCALING
anode_prob[N] = normalizeVector(anode_prob)*g[N];
if(anode_other_tree_prob) memcpy(anode_other_tree_prob+VECTOR_SLOT*aSetIdx, anode_prob, (N+1)*sizeof(double));
#else
if(anode_other_tree_prob) memcpy(anode_other_tree_prob+VECTOR_SLOT*aSetIdx, anode_prob, N*sizeof(double));
#endif
first = false;
}
else
{
#ifdef USE_CPV_SCALING
double ALIGN64 temp[N+1];
#else
double ALIGN64 temp[N];
#endif
double* x = anode_other_tree_prob ? anode_other_tree_prob+VECTOR_SLOT*aSetIdx : temp;
aSet.doTransition(aSetIdx, branch_id, computeLikelihoodsWalkerTC(m, aSet, aSetIdx), x);
double* g = computeLikelihoodsWalkerTC(m, aSet, aSetIdx);
aSet.doTransition(aSetIdx, branch_id, g, x);
#ifdef USE_CPV_SCALING
x[N] = (normalizeVector(x)*g[N]);
anode_prob[N] *= x[N];
#endif
elementWiseMult(anode_prob, x);
}
}
......@@ -915,11 +946,18 @@ double* Forest::computeLikelihoodsWalkerTC(const ForestNode* aNode, const Probab
{
if(first)
{
#ifdef USE_CPV_SCALING
memcpy(anode_prob, anode_other_tree_prob+VECTOR_SLOT*aSetIdx, (N+1)*sizeof(double));
#else
memcpy(anode_prob, anode_other_tree_prob+VECTOR_SLOT*aSetIdx, N*sizeof(double));
#endif
first = false;
}
else
{
#ifdef USE_CPV_SCALING
anode_prob[N] *= anode_other_tree_prob[VECTOR_SLOT*aSetIdx+N];
#endif
elementWiseMult(anode_prob, anode_other_tree_prob+VECTOR_SLOT*aSetIdx);
}
}
......@@ -945,13 +983,6 @@ double* Forest::computeLikelihoodsWalkerTC(const ForestNode* aNode, const Probab
for(unsigned int idx=0; idx < nc; ++idx) elementWiseMult(anode_prob, mInvCodonFreq);
break;
}
#ifdef USE_CPV_SCALING
double w[N];
memcpy(w, anode_prob, N*sizeof(double));
double len = normalizeVector(w);
std::cout << "*** " << std::setw(3) << aNode->mBranchId+1 << ' ' << std::scientific << std::setprecision(4) << len << std::endl;
#endif
#endif
return anode_prob;
......
......@@ -2,9 +2,7 @@
#ifndef FOREST_H
#define FOREST_H
#include <fstream>
#include <vector>
#include <utility>
#include "PhyloTree.h"
#include "Genes.h"
#include "ForestNode.h"
......
......@@ -64,7 +64,7 @@ struct ForestNode
///
ForestNode() : mChildrenSameTreeFlags(ALL_CHILDREN_SAME_TREE), mChildrenCount(0), mLeafCodon(-1), mBranchId(0), mOwnTree(0), mParent(NULL), mInternalNodeId(0)
#ifdef NON_RECURSIVE_VISIT
, mFirstChild(false), mChildIdx(0),
, mFirstChild(false), mChildIdx(0)
#endif
{
#ifndef NEW_LIKELIHOOD
......
......@@ -62,7 +62,7 @@ long long Genes::getCodonIdx(std::string aSpecie, size_t aSite) const
return -code;
}
void Genes::setLeaveProb(double* aLeaveProbVect, double aProb) const
void Genes::setLeaveProb(double* aLeaveProbVect) const
{
if(mCurrentPositions.empty())
{
......@@ -70,13 +70,14 @@ void Genes::setLeaveProb(double* aLeaveProbVect, double aProb) const
}
else if(mCurrentPositions.size() == 1)
{
aLeaveProbVect[mCurrentPositions[0]] = aProb;
aLeaveProbVect[mCurrentPositions[0]] = 1.;
}
else
{
aProb /= static_cast<double>(mCurrentPositions.size());
for(size_t i=0; i < mCurrentPositions.size(); ++i) aLeaveProbVect[mCurrentPositions[i]] = aProb;
double prob = 1./static_cast<double>(mCurrentPositions.size());
for(size_t i=0; i < mCurrentPositions.size(); ++i) aLeaveProbVect[mCurrentPositions[i]] = prob;
}
#ifdef USE_CPV_SCALING
// This extra location will be used to carry the CPV norm to revert normalization at the end of the likelihood computation
aLeaveProbVect[N] = 1.0;
......
......@@ -60,14 +60,13 @@ public:
///
long long getCodonIdx(std::string aSpecie, size_t aSite) const;
/// Set the correct positions in the leave probability vector to one.
/// Set the correct positions in the leave probability vector to 1/num_positions.
///
/// @param[out] aLeaveProbVect The leave probability vector to be set.
/// @param[in] aProb The probability to be set at the codon positions inside aLeaveProbVect.
///
/// @exception FastCodeMLFatal If saved codon is invalid.
///
void setLeaveProb(double* aLeaveProbVect, double aProb=1.0) const;
void setLeaveProb(double* aLeaveProbVect) const;
/// Update codon count in a given array.
///
......
......@@ -14,6 +14,8 @@
#ifdef USE_LAPACK
#include "blas.h"
#else
#include <cmath>
#endif
//#ifdef USE_MKL_VML
......@@ -118,6 +120,7 @@ inline bool isDifferent(double aFirst, double aSecond)
#ifdef USE_CPV_SCALING
/// Normalize a vector (length 61).
///
/// @param[in,out] aVector The vector to be scaled
......@@ -126,13 +129,21 @@ inline bool isDifferent(double aFirst, double aSecond)
///
inline double normalizeVector(double* RESTRICT aVector)
{
#ifdef USE_LAPACK
double norm = dnrm2_(&N, aVector, &I1);
double inv_norm = 1./norm;
dscal_(&N, &inv_norm, aVector, &I1);
#else
double norm = 0.;
for(int i=0; i < N; ++i) norm += aVector[i]*aVector[i];
norm = sqrt(norm);
for(int i=0; i < N; ++i) aVector[i] /= norm;
#endif
return norm;
}
#endif
......
......@@ -19,13 +19,16 @@ static const int Nt = 4;
///
static const int MATRIX_SLOT = N*N+3;
/// Slot size for a vector. It should be equal or larger than N+1. Value identified by measurement.
/// Slot size for a vector. It should be equal or larger than N+1. The slot size value has been identified by measurement.
/// In the vectors used as CPV the location after the end will contain the CVP norm.
///
#ifdef NEW_LIKELIHOOD
static const int VECTOR_SLOT = 62;
#ifdef USE_CPV_SCALING
#error "Cannot use CPV scaling with new likelihood!"
#endif
static const int VECTOR_SLOT = N;
#else
static const int VECTOR_SLOT = 66;
static const int VECTOR_SLOT = N+1+4;
#endif
/// Alignment to avoid cache line false sharing.
......
......@@ -4,6 +4,7 @@
#include <string>
#include <vector>
#include <fstream>
#include "TreeNode.h"
#include "ForestNode.h"
#include "Types.h"
......
......@@ -146,6 +146,9 @@ int main(int aRgc, char **aRgv)
#ifdef USE_MPI
std::cout << "USE_MPI ";
#endif
#ifdef USE_CPV_SCALING
std::cout << "USE_CPV_SCALING ";
#endif
#ifdef NEW_LIKELIHOOD
std::cout << "NEW_LIKELIHOOD ";
#endif
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment