Commit 1724e908 authored by Nicolas Salamin's avatar Nicolas Salamin
Browse files

There are mistakes in the formula from Novembre and Slatkin 2008. I changed...

There are mistakes in the formula from Novembre and Slatkin 2008. I changed the modification of S and the weighted average. Now it gives perfectly similar values than with the multinormal density
parent 5fff7154
......@@ -38,25 +38,23 @@ fN2<-function(xa, xb, vpa, vpb, sigma2, anc) {
}
condLike<-function(sigma2=1.0, condvec, kids) {
#following the paper by Novembre and Slatkin 2008
#following the paper by Felsenstein 1985 + Novembre and Slatkin 2008
#given two descendants {a, b} and an ancestor c:
#v'_a = v_a + S^2_a
#v'_b = v_b + S^2_b
#L_c = N(x_a - x_b, (v'_a + v'_b)*sigma^2)
#S^2_c = 1 / ((1/v_a) + (1/v_b))
#x_c = ((1/v'_a) * x_a + (1/v'_b) * x_b) / ((1/v'_a) + (1/v'_b))
#S^2_c = v'_a * v'_b / (v'_a) + v'_b)
#x_c = (v'_b * x_a) / (v'_a + v'_b) + (v'_a * x_b) / (v'_a + /v'_b)
a <- kids[1];
b <- kids[2];
vpa <- condvec[a, 4] + condvec[a, 3];
vpb <- condvec[b, 4] + condvec[b, 3];
anc <- ((condvec[a, 2] / vpa) + (condvec[b, 2] / vpb)) / ((1 / vpa) + (1 / vpb));
#print(condvec[c(a,b), 2])
anc <- ((condvec[a, 2] * vpb) / (vpa + vpb)) + ((condvec[b, 2] * vpa) / (vpa + vpb))
lnL <- fN2(condvec[a, 2], condvec[b, 2], vpa, vpb, sigma2, anc) + condvec[a, 1] + condvec[b, 1];# the log likelihood for the node
S <- 1 / ((1 / condvec[a, 4]) + (1 / condvec[b, 4]));
S <- (vpa * vpb) / (vpa + vpb);
return(c(lnL, anc, S));
}
......@@ -89,7 +87,7 @@ condvecInit<-function(tree, data, ntips, tnodes) {
return(condvec);
}
lnLike<-function(tree, data, sigma2, optim=TRUE) {
lnLike<-function(tree, data, sigma2, getLnl=TRUE) {
ntips<-length(tree$tip.label);
nnodes<-tree$Nnode;
tnodes<-ntips+nnodes;
......@@ -110,7 +108,7 @@ lnLike<-function(tree, data, sigma2, optim=TRUE) {
kids<-which(tree$edge[,1]==(ntips+1));
condvec[tnodes, 1:3]<-condLike(sigma2=sigma2, condvec=condvec, kids)
if(optim) {
if(getLnl) {
return(condvec[tnodes, 1] + fN(condvec[tnodes, 2], condvec[tnodes, 2], condvec[tnodes, 3], 0, sigma2));
}
else {
......@@ -119,12 +117,12 @@ lnLike<-function(tree, data, sigma2, optim=TRUE) {
}
library(mnormt)
fitBM <- function(tree, x, anc, sigma2, pruning=TRUE, optim=TRUE) {
fitBM <- function(tree, x, anc, sigma2, pruning=TRUE, optim=TRUE, getLnl=TRUE) {
n <- length(x);
if(optim) {
if(pruning) {
lik <- function(par, tree, x) lnLike(tree, x, par);
lik <- function(par, tree, x) lnLike(tree, x, par, getLnl=TRUE); #if you want to optimize, the function MUST return the likelihood, whatever the wish of the user...
fit <- optim(c(1), lik, tree = tree, x = x, method = "L-BFGS-B", lower = c(1e-12), upper = c(Inf), control = list(fnscale = -1));
return(list(sig2 = fit$par, logL = fit$value, convergence = fit$convergence));
}
......@@ -136,7 +134,7 @@ fitBM <- function(tree, x, anc, sigma2, pruning=TRUE, optim=TRUE) {
}
else {
if(pruning) {
return(lnLike(tree, x, sigma2, optim));
return(lnLike(tree, x, sigma2, getLnl=getLnl));
}
else {
return(sum(dmnorm(x, mean = rep(anc, n), varcov = sigma2 * vcv(tree), log = TRUE)));
......@@ -150,26 +148,45 @@ fitBM <- function(tree, x, anc, sigma2, pruning=TRUE, optim=TRUE) {
require(phytools);
tree<-pbtree(n=1000, scale=1);
data<-data.frame(char=fastBM(tree, sig2=1, a=10), row.names=tree$tip.label);
#### testing if the pruning algorithm or the typical multinormal density give the same likelhood: yes they do!
tree<-pbtree(n=50, scale=1);
data<-data.frame(char=fastBM(tree, sig2=2, a=0), row.names=tree$tip.label);
anc2<-fitBM(tree, data[,1], 0, 2.5, pruning=FALSE, optim=TRUE)$x0
anc1<-fitBM(tree, data[,1], 0.646066, 0.816942, pruning=TRUE, optim=TRUE)$x0
s <- seq(0.1, 2.5, by=0.1)
s <- seq(0.1, 4.5, by=0.1)
res1 <- numeric(length(s))
res2 <- numeric(length(s))
for(i in 1:length(s)) {
res1[i] <- fitBM(tree, data[,1], anc1, s[i], pruning=TRUE, optim=FALSE)
res2[i] <- fitBM(tree, data[,1], anc2, s[i], pruning=FALSE, optim=FALSE)
res1[i] <- fitBM(tree, data[,1], sigma2=s[i], pruning=TRUE, optim=FALSE, getLnl=TRUE)
res2[i] <- fitBM(tree, data[,1], -0.880113, s[i], pruning=FALSE, optim=FALSE)
}
plot(s, res2, t="l", col="red")
lines(s, res1, t="l")
which(res1 == max(res1))
which(res2 == max(res2))
anc2
anc1
system.time(fitBM(tree, data[,1], 0.646066, 0.816942, pruning=TRUE, optim=TRUE))
fitBM(tree, data[,1], sigma2=0.816942, pruning=TRUE, optim=FALSE)
\ No newline at end of file
### testing the speed up. The pruning is much faster for large trees:
tree<-pbtree(n=500, scale=1);
data<-data.frame(char=fastBM(tree, sig2=2, a=0), row.names=tree$tip.label);
system.time(fitBM(tree, data[,1], 0.646066, 0.816942, pruning=FALSE, optim=FALSE))
system.time(fitBM(tree, data[,1], 10, sigma2=0.816942, pruning=TRUE, optim=FALSE))
### testing the accuracy of the parameter estimates
res<-numeric(length=100)
anc<-numeric(length=100)
trueS<-0.5
trueA<-1.0
for(i in 1:100) {
tree<-pbtree(n=20, scale=1);
data<-data.frame(char=fastBM(tree, sig2=trueS, a=trueA), row.names=tree$tip.label);
res[i]<-fitBM(tree, data[,1], sigma2=2.0, pruning=TRUE, optim=TRUE, getLnl=TRUE)$sig2
anc[i]<-fitBM(tree, data[,1], sigma2=res[i], pruning=TRUE, optim=FALSE, getLnl=FALSE)[9]
}
plot(res, ylim=c(0,2*trueS))
abline(h=trueS, col="red")
plot(anc, ylim=c(0, 3*trueA))
abline(h=trueA, col="red")
\ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment