Commit 5fff7154 authored by Nicolas Salamin's avatar Nicolas Salamin
Browse files

Working now when assuming that the internal nodes are weighted averages from...

Working now when assuming that the internal nodes are weighted averages from the two descendants. Crazy speed-ups vs fitBM...
parent 79a4d7e6
......@@ -4,58 +4,40 @@
require(ape);
condLike.old<-function(sigma=1.0, blen, condvec, is.list=TRUE) {
# We assume in this function that the tree is fully dichotomous and that 2 descendants are provided
# for each internal node.
# The parameter sigma is the standard deviation of the rate of evolution of the trait.
# The branch lengths for internal nodes should be extended a bit because the variance of the internal
# nodes is a bit larger than what is observed on the tree
# see eqn 23.46 on page 405 in Joe's book.
# However, condvec[,3] stores the standard deviation of S, which is sigma^2 * (v1v2) / (v1 + v2) if
# you consider two descendants with branch lengths v1 and v2. We want here to recover only
# (v1v2) / (v1 + v2), so we need to divide it by sigma^2 and take the square of it.
# The value in condvec[,3] is 0 for terminal taxa, where the branch lengths are "correct".
mod.bl <- blen + (condvec[,3] * condvec[,3]) / (sigma * sigma);
# The ancestral value is simply the weighted mean of the values, using the modified branch lengths
# as the variance (modified because for internal nodes the variance is a bit larger).
# see eqn 23.43 on page 405 in Joe's book
anc <- sum(condvec[,2] * mod.bl) / sum(mod.bl);
# Extra variance due to the composite construction of the ancestor based on the two descendants
# given here is sigma^2 (v1 v2) / (v1 + v2)
# see eqn 23.46 on page 405 in Joe's book
# Here, we store the standard deviation and we thus take the square-root of the variance.
S <- sqrt((sigma * sigma) * prod(mod.bl) / sum(mod.bl));
# The likelihood is then simply the density of the normal distribution for the contrast of the two values
lnL <- (-(condvec[1,2] - condvec[2,2])^2 / (2 * sigma * sigma * sum(mod.bl)) - log((2 * pi * sigma * sigma * sum(mod.bl))^2)) + sum(condvec[,1]);
if(is.list) {
return(list(anc=lnL, lnl=anc, S=S));
}
else {
return(c(lnL, anc, S));
}
}
fN(0.732627, 0.314222, 1, 1, 1)
fN2(0.732627, 0.314222, 1, 1, 1, 0.523424)
sum(dmnorm(x, mean = rep(1, n), varcov = sigma * sigma * vcv(tree), log = TRUE))
fN<-function(xa, xb, vpa, vpb, sigma) {
#the normal density. Sigma is the standard deviation. Same as log(dnorm(xa-xb, 0, sqrt(sigma * sigma * (vpa + vpb))))
return((-(xa - xb)^2 / (2 * sigma * sigma * (vpa + vpb)) - log(sqrt(2 * pi * sigma * sigma * (vpa + vpb)))));
# We assume in this function that the tree is fully dichotomous and that 2 descendants are provided
# for each internal node.
# The parameter sigma is the standard deviation of the rate of evolution of the trait.
# The branch lengths for internal nodes should be extended a bit because the variance of the internal
# nodes is a bit larger than what is observed on the tree
# see eqn 23.46 on page 405 in Joe's book.
# However, condvec[,3] stores the standard deviation of S, which is sigma^2 * (v1v2) / (v1 + v2) if
# you consider two descendants with branch lengths v1 and v2. We want here to recover only
# (v1v2) / (v1 + v2), so we need to divide it by sigma^2 and take the square of it.
# The value in condvec[,3] is 0 for terminal taxa, where the branch lengths are "correct".
# The ancestral value is simply the weighted mean of the values, using the modified branch lengths
# as the variance (modified because for internal nodes the variance is a bit larger).
# see eqn 23.43 on page 405 in Joe's book
# Extra variance due to the composite construction of the ancestor based on the two descendants
# given here is sigma^2 (v1 v2) / (v1 + v2)
# see eqn 23.46 on page 405 in Joe's book
# Here, we store the standard deviation and we thus take the square-root of the variance.
# The likelihood is then simply the density of the normal distribution for the contrast of the two values
fN<-function(xa, xb, vpa, vpb, sigma2) {
#the normal density. Sigma2 is the variance. Same as log(dnorm(xa-xb, 0, sqrt(sigma * sigma * (vpa + vpb))))
return((-(xa - xb)^2 / (2 * sigma2 * (vpa + vpb)) - log(sqrt(2 * pi * sigma2 * (vpa + vpb)))));
}
fN2<-function(xa, xb, vpa, vpb, sigma, anc) {
#same as fN but with a known ancestor instead of xa-xb, see Joe's book eqn 23.10 (some mistakes in the denominator though)
return(-(((xa - anc)^2 / vpa) + ((xb - anc)^2 / vpb)) / (2 * sigma * sigma) - log(sqrt(2 * pi * sigma * sigma * (vpa + vpb))));
fN2<-function(xa, xb, vpa, vpb, sigma2, anc) {
#same as fN but with a known ancestor instead of xa-xb, see Joe's book eqn 23.10 (some mistakes in the denominator though in his equation)
return(-(((xa - anc)^2 / vpa) + ((xb - anc)^2 / vpb)) / (2 * sigma2) - log(sqrt(2 * pi * sigma2 * (vpa + vpb))));
}
condLike<-function(sigma=1.0, condvec, kids, try=FALSE) {
condLike<-function(sigma2=1.0, condvec, kids) {
#following the paper by Novembre and Slatkin 2008
#given two descendants {a, b} and an ancestor c:
#v'_a = v_a + S^2_a
......@@ -72,22 +54,13 @@ condLike<-function(sigma=1.0, condvec, kids, try=FALSE) {
#print(condvec[c(a,b), 2])
if(try) {
lnL <- fN(condvec[a, 2], anc, vpa, 0, sigma) + condvec[a, 1] + fN(condvec[b, 2], anc, vpb, 0, sigma) + condvec[b, 1];# the log likelihood for the node
}
else {
lnL <- fN(condvec[a, 2], condvec[b, 2], vpa, vpb, sigma) + condvec[a, 1] + condvec[b, 1];# the log likelihood for the node
}
lnL <- fN2(condvec[a, 2], condvec[b, 2], vpa, vpb, sigma2, anc) + condvec[a, 1] + condvec[b, 1];# the log likelihood for the node
S <- 1 / ((1 / condvec[a, 4]) + (1 / condvec[b, 4]));
return(c(lnL, anc, S));
}
condLikeG<-function(sigma=1.0, condvec, anc, kids, is.list=TRUE) {
}
#set the condL for the tips based on the data and compute the distance between each node and the root
condvecInit<-function(tree, data, ntips, tnodes) {
condvec<-matrix(nrow=tnodes, ncol=4); #three columns, the first for the logLn, the second for the state, the third for extra BM variance and the fourth is the brlen subtending the node
......@@ -116,7 +89,7 @@ condvecInit<-function(tree, data, ntips, tnodes) {
return(condvec);
}
lnLike<-function(tree, data, anc, sigma) {
lnLike<-function(tree, data, sigma2, optim=TRUE) {
ntips<-length(tree$tip.label);
nnodes<-tree$Nnode;
tnodes<-ntips+nnodes;
......@@ -129,38 +102,44 @@ lnLike<-function(tree, data, anc, sigma) {
for(i in tnodes:(ntips+2)) {
row<-which(tree$edge[,2] == i);
kids<-which(tree$edge[,1] == i);
condvec[row, 1:3]<-condLike(sigma=sigma, condvec=condvec, kids);
condvec[row, 1:3]<-condLike(sigma2=sigma2, condvec=condvec, kids);
}
#lnL<-tmp;
#finish with the root
kids<-which(tree$edge[,1]==(ntips+1));
condvec[tnodes, 1:3]<-condLike(sigma=sigma, condvec=condvec, kids)
condvec[tnodes, 1:3]<-condLike(sigma2=sigma2, condvec=condvec, kids)
print(condvec)
return(condvec[tnodes, 1] + fN(condvec[tnodes, 2], anc, condvec[tnodes, 3], 0, sigma));
if(optim) {
return(condvec[tnodes, 1] + fN(condvec[tnodes, 2], condvec[tnodes, 2], condvec[tnodes, 3], 0, sigma2));
}
else {
return(condvec[, 2]);
}
}
library(mnormt)
fitBM <- function(tree, x, anc, sigma, pruning=TRUE, optim=TRUE) {
fitBM <- function(tree, x, anc, sigma2, pruning=TRUE, optim=TRUE) {
n <- length(x);
if(optim) {
if(pruning) {
lik <- function(par, tree, x, n) lnLike(tree, x, par[1], par[2]);
lik <- function(par, tree, x) lnLike(tree, x, par);
fit <- optim(c(1), lik, tree = tree, x = x, method = "L-BFGS-B", lower = c(1e-12), upper = c(Inf), control = list(fnscale = -1));
return(list(sig2 = fit$par, logL = fit$value, convergence = fit$convergence));
}
else {
lik <- function(par, tree, x, n) sum(dmnorm(x, mean = rep(par[1], n), varcov = par[2] * par[2] * vcv(tree), log = TRUE));
lik <- function(par, tree, x, n) sum(dmnorm(x, mean = rep(par[1], n), varcov = par[2] * vcv(tree), log = TRUE));
fit <- optim(c(0, 1), lik, tree = tree, x = x, n = n, method = "L-BFGS-B", lower = c(-Inf, 1e-12), upper = c(Inf, Inf), control = list(fnscale = -1));
return(list(x0 = fit$par[1], sig2 = fit$par[2], logL = fit$value, convergence = fit$convergence));
}
fit <- optim(c(0, 1), lik, tree = tree, x = x, n = n, method = "L-BFGS-B", lower = c(-Inf, 1e-12), upper = c(Inf, Inf), control = list(fnscale = -1));
return(list(x0 = fit$par[1], sig2 = fit$par[2], logL = fit$value, convergence = fit$convergence));
}
else {
if(pruning) {
return(lnLike(tree, x, anc, sigma));
return(lnLike(tree, x, sigma2, optim));
}
else {
return(sum(dmnorm(x, mean = rep(anc, n), varcov = sigma * sigma * vcv(tree), log = TRUE)));
return(sum(dmnorm(x, mean = rep(anc, n), varcov = sigma2 * vcv(tree), log = TRUE)));
}
}
}
......@@ -171,27 +150,26 @@ fitBM <- function(tree, x, anc, sigma, pruning=TRUE, optim=TRUE) {
require(phytools);
tree<-pbtree(n=2, scale=1);
data<-data.frame(char=fastBM(tree, sig2=1, a=0), row.names=tree$tip.label);
fitBM(tree, data[,1], 0, 2.5, pruning=FALSE, optim=TRUE)
fitBM(tree, data[,1], 0.646066, 0.816942, pruning=TRUE, optim=FALSE)
xxx<-condvecInit(tree, data[,1], 5, 9)
condLike(sigma=1, xxx, c(4,5), try=TRUE)
tree<-pbtree(n=1000, scale=1);
data<-data.frame(char=fastBM(tree, sig2=1, a=10), row.names=tree$tip.label);
lnLike(tree, data, 0, 2)
system.time(lnLike(tree, data, 0, 2.5))
anc2<-fitBM(tree, data[,1], 0, 2.5, pruning=FALSE, optim=TRUE)$x0
anc1<-fitBM(tree, data[,1], 0.646066, 0.816942, pruning=TRUE, optim=TRUE)$x0
s <- seq(0.5, 2.5, by=0.1)
s <- seq(0.1, 2.5, by=0.1)
res1 <- numeric(length(s))
res2 <- numeric(length(s))
for(i in 1:length(s)) {
res1[i] <- fitBM(tree, data[,1], 0.942176, s[i], pruning=TRUE, optim=FALSE)
res2[i] <- fitBM(tree, data[,1], 0.942176, s[i], pruning=FALSE, optim=FALSE)
res1[i] <- fitBM(tree, data[,1], anc1, s[i], pruning=TRUE, optim=FALSE)
res2[i] <- fitBM(tree, data[,1], anc2, s[i], pruning=FALSE, optim=FALSE)
}
plot(s, res2, t="l", col="red")
lines(s, res1)
lines(s, res1, t="l")
which(res1 == max(res1))
which(res2 == max(res2))
anc2
anc1
system.time(fitBM(tree, data[,1], 0.646066, 0.816942, pruning=TRUE, optim=TRUE))
fitBM(tree, data[,1], sigma2=0.816942, pruning=TRUE, optim=FALSE)
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment