Commit e2047e61 authored by Nicolas Salamin's avatar Nicolas Salamin
Browse files

adding some function to compute the likelihood of a continuous character under...

adding some function to compute the likelihood of a continuous character under BM using the pruning algorithm
parents
#################################
## likelihood calculation in R ##
#################################
require(ape);
condLike.old<-function(sigma=1.0, blen, condvec, is.list=TRUE) {
# We assume in this function that the tree is fully dichotomous and that 2 descendants are provided
# for each internal node.
# The parameter sigma is the standard deviation of the rate of evolution of the trait.
# The branch lengths for internal nodes should be extended a bit because the variance of the internal
# nodes is a bit larger than what is observed on the tree
# see eqn 23.46 on page 405 in Joe's book.
# However, condvec[,3] stores the standard deviation of S, which is sigma^2 * (v1v2) / (v1 + v2) if
# you consider two descendants with branch lengths v1 and v2. We want here to recover only
# (v1v2) / (v1 + v2), so we need to divide it by sigma^2 and take the square of it.
# The value in condvec[,3] is 0 for terminal taxa, where the branch lengths are "correct".
mod.bl <- blen + (condvec[,3] * condvec[,3]) / (sigma * sigma);
# The ancestral value is simply the weighted mean of the values, using the modified branch lengths
# as the variance (modified because for internal nodes the variance is a bit larger).
# see eqn 23.43 on page 405 in Joe's book
anc <- sum(condvec[,2] * mod.bl) / sum(mod.bl);
# Extra variance due to the composite construction of the ancestor based on the two descendants
# given here is sigma^2 (v1 v2) / (v1 + v2)
# see eqn 23.46 on page 405 in Joe's book
# Here, we store the standard deviation and we thus take the square-root of the variance.
S <- sqrt((sigma * sigma) * prod(mod.bl) / sum(mod.bl));
# The likelihood is then simply the density of the normal distribution for the contrast of the two values
lnL <- (-(condvec[1,2] - condvec[2,2])^2 / (2 * sigma * sigma * sum(mod.bl)) - log((2 * pi * sigma * sigma * sum(mod.bl))^2)) + sum(condvec[,1]);
if(is.list) {
return(list(anc=lnL, lnl=anc, S=S));
}
else {
return(c(lnL, anc, S));
}
}
fN<-function(xa, xb, vpa, vpb, sigma) {
#the normal density. Sigma is the standard deviation.
return((-(xa - xb)^2 / (2 * sigma * sigma * (vpa + vpb)) - log(sqrt(2 * pi * sigma * sigma * (vpa + vpb)))));
#return(log(dnorm(xa-xb, sqrt((vpa + vpb) * sigma * sigma))));
}
condLike<-function(sigma=1.0, condvec, kids, try=FALSE) {
#following the paper by Novembre and Slatkin 2008
#given two descendants {a, b} and an ancestor c:
#v'_a = v_a + S^2_a
#v'_b = v_b + S^2_b
#L_c = N(x_a - x_b, (v'_a + v'_b)*sigma^2)
#S^2_c = 1 / ((1/v_a) + (1/v_b))
#x_c = ((1/v'_a) * x_a + (1/v'_b) * x_b) / ((1/v'_a) + (1/v'_b))
a <- kids[1];
b <- kids[2];
vpa <- condvec[a, 4] + condvec[a, 3];
vpb <- condvec[b, 4] + condvec[b, 3];
anc <- ((condvec[a, 2] / vpa) + (condvec[b, 2] / vpb)) / ((1 / vpa) + (1 / vpb));
#print(condvec[c(a,b), 2])
if(try) {
lnL <- fN(condvec[a, 2], anc, vpa, 0, sigma) + condvec[a, 1] + fN(condvec[b, 2], anc, vpb, 0, sigma) + condvec[b, 1];# the log likelihood for the node
}
else {
lnL <- fN(condvec[a, 2], condvec[b, 2], vpa, vpb, sigma) + condvec[a, 1] + condvec[b, 1];# the log likelihood for the node
}
S <- 1 / ((1 / condvec[a, 4]) + (1 / condvec[b, 4]));
return(c(lnL, anc, S));
}
condLikeG<-function(sigma=1.0, condvec, anc, kids, is.list=TRUE) {
}
#set the condL for the tips based on the data and compute the distance between each node and the root
condvecInit<-function(tree, data, ntips, tnodes) {
condvec<-matrix(nrow=tnodes, ncol=4); #three columns, the first for the logLn, the second for the state, the third for extra BM variance and the fourth is the brlen subtending the node
for(i in 1:tnodes) {
row<-which(tree$edge[,2] == i);
if(i <= ntips) {
condvec[row, 1] <- 0.0 #Log likelihood
condvec[row, 2] <- data[i]; #data for this taxa
condvec[row, 3] <- 0.0; #extra variance brought when computing the BM on trees
condvec[row, 4] <- tree$edge.length[row]; #branch length below the node
}
else {
condvec[row, 1] <- 0.0 #Log likelihood
condvec[row, 2] <- 0.0; #data for this taxa
condvec[row, 3] <- 0.0; #extra variance brought when computing the BM on trees
if(i > (ntips+1)) {
condvec[row, 4] <- tree$edge.length[row]; #branch length below the node
}
else {
condvec[row, 4] <- 0.0; #branch length for the root, which is 0.0, usually
}
}
}
return(condvec);
}
lnLike<-function(tree, data, anc, sigma) {
ntips<-length(tree$tip.label);
nnodes<-tree$Nnode;
tnodes<-ntips+nnodes;
#conditional likelihood vectors
condvec<-condvecInit(tree=tree, data=data, ntips=ntips, tnodes=tnodes);
#do some kind of tree traversal, starting from the node
#with the highest node value (because of the sorting in ape)
for(i in tnodes:(ntips+2)) {
row<-which(tree$edge[,2] == i);
kids<-which(tree$edge[,1] == i);
condvec[row, 1:3]<-condLike(sigma=sigma, condvec=condvec, kids);
}
#lnL<-tmp;
#finish with the root
kids<-which(tree$edge[,1]==(ntips+1));
condvec[tnodes, 1:3]<-condLike(sigma=sigma, condvec=condvec, kids)
print(condvec[tnodes,])
return(condvec[tnodes, 1] + fN(condvec[tnodes, 2], anc, condvec[tnodes, 3], 0, sigma));
}
library(mnormt)
fitBM <- function(tree, x, anc, sigma, pruning=TRUE, optim=TRUE) {
n <- length(x);
if(optim) {
if(pruning) {
lik <- function(par, tree, x, n) lnLike(tree, x, par[1], par[2]);
}
else {
lik <- function(par, tree, x, n) sum(dmnorm(x, mean = rep(par[1], n), varcov = par[2] * vcv(tree), log = TRUE));
}
fit <- optim(c(0, 1), lik, tree = tree, x = x, n = n, method = "L-BFGS-B", lower = c(-Inf, 1e-12), upper = c(Inf, Inf), control = list(fnscale = -1));
return(list(x0 = fit$par[1], sig2 = fit$par[2], logL = fit$value, convergence = fit$convergence));
}
else {
if(pruning) {
return(lnLike(tree, x, anc, sigma));
}
else {
return(sum(dmnorm(x, mean = rep(anc, n), varcov = sigma * sigma * vcv(tree), log = TRUE)));
}
}
}
###########################
## Testing the functions ##
###########################
require(phytools);
tree<-pbtree(n=5, scale=1);
data<-data.frame(char=fastBM(tree, sig2=5, a=0), row.names=tree$tip.label);
fitBM(tree, data[,1], 0, 2.5, pruning=FALSE, optim=TRUE)
fitBM(tree, data[,1], 0, 2.5, pruning=TRUE, optim=TRUE)
tree$edge
xxx<-condvecInit(tree, data[,1], 5, 9)
condLike(sigma=1, xxx, c(4,5), try=TRUE)
xxx[c(4,5),]
dnorm(-0.111984+1.561902, 0, 0.125411*2)
dnorm(-0.111984, -0.8369430, 0.125411)*2
lnLike(tree, data, 0, 2)
system.time(lnLike(tree, data, 0, 2.5))
s <- seq(0.9, 10, by=0.1)
res1 <- numeric(length(s))
res2 <- numeric(length(s))
for(i in 1:length(s)) {
res1[i] <- fitBM(tree, data[,1], -0.5, s[i], pruning=TRUE, optim=FALSE)
res2[i] <- fitBM(tree, data[,1], -0.5, s[i], pruning=FALSE, optim=FALSE)
}
plot(s, res2, t="l", col="red")
lines(s, res1)
which(res2 == max(res2))
optim(c(1), lnLike, tree=tree, data=data, method="L-BFGS-B", lower=c(1e-12), upper=c(Inf), control=list(fnscale=-1))
lnLike(tree, data, 0.7378779)
x<-seq(0.1, 5, 0.01)
plot(x, sapply(x, lnLike, tree=tree, data=data), type="l")
condLike(sigma=1, 0.01, c(0.0, 10, 0), is.list=FALSE)
condvecInit(tree, data[,1], 3, 5)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment