### There are mistakes in the formula from Novembre and Slatkin 2008. I changed...

`There are mistakes in the formula from Novembre and Slatkin 2008. I changed the modification of S and the weighted average. Now it gives perfectly similar values than with the multinormal density`
parent 5fff7154
 ... ... @@ -38,25 +38,23 @@ fN2<-function(xa, xb, vpa, vpb, sigma2, anc) { } condLike<-function(sigma2=1.0, condvec, kids) { #following the paper by Novembre and Slatkin 2008 #following the paper by Felsenstein 1985 + Novembre and Slatkin 2008 #given two descendants {a, b} and an ancestor c: #v'_a = v_a + S^2_a #v'_b = v_b + S^2_b #L_c = N(x_a - x_b, (v'_a + v'_b)*sigma^2) #S^2_c = 1 / ((1/v_a) + (1/v_b)) #x_c = ((1/v'_a) * x_a + (1/v'_b) * x_b) / ((1/v'_a) + (1/v'_b)) #S^2_c = v'_a * v'_b / (v'_a) + v'_b) #x_c = (v'_b * x_a) / (v'_a + v'_b) + (v'_a * x_b) / (v'_a + /v'_b) a <- kids; b <- kids; vpa <- condvec[a, 4] + condvec[a, 3]; vpb <- condvec[b, 4] + condvec[b, 3]; anc <- ((condvec[a, 2] / vpa) + (condvec[b, 2] / vpb)) / ((1 / vpa) + (1 / vpb)); #print(condvec[c(a,b), 2]) anc <- ((condvec[a, 2] * vpb) / (vpa + vpb)) + ((condvec[b, 2] * vpa) / (vpa + vpb)) lnL <- fN2(condvec[a, 2], condvec[b, 2], vpa, vpb, sigma2, anc) + condvec[a, 1] + condvec[b, 1];# the log likelihood for the node S <- 1 / ((1 / condvec[a, 4]) + (1 / condvec[b, 4])); S <- (vpa * vpb) / (vpa + vpb); return(c(lnL, anc, S)); } ... ... @@ -89,7 +87,7 @@ condvecInit<-function(tree, data, ntips, tnodes) { return(condvec); } lnLike<-function(tree, data, sigma2, optim=TRUE) { lnLike<-function(tree, data, sigma2, getLnl=TRUE) { ntips<-length(tree\$tip.label); nnodes<-tree\$Nnode; tnodes<-ntips+nnodes; ... ... @@ -110,7 +108,7 @@ lnLike<-function(tree, data, sigma2, optim=TRUE) { kids<-which(tree\$edge[,1]==(ntips+1)); condvec[tnodes, 1:3]<-condLike(sigma2=sigma2, condvec=condvec, kids) if(optim) { if(getLnl) { return(condvec[tnodes, 1] + fN(condvec[tnodes, 2], condvec[tnodes, 2], condvec[tnodes, 3], 0, sigma2)); } else { ... ... @@ -119,12 +117,12 @@ lnLike<-function(tree, data, sigma2, optim=TRUE) { } library(mnormt) fitBM <- function(tree, x, anc, sigma2, pruning=TRUE, optim=TRUE) { fitBM <- function(tree, x, anc, sigma2, pruning=TRUE, optim=TRUE, getLnl=TRUE) { n <- length(x); if(optim) { if(pruning) { lik <- function(par, tree, x) lnLike(tree, x, par); lik <- function(par, tree, x) lnLike(tree, x, par, getLnl=TRUE); #if you want to optimize, the function MUST return the likelihood, whatever the wish of the user... fit <- optim(c(1), lik, tree = tree, x = x, method = "L-BFGS-B", lower = c(1e-12), upper = c(Inf), control = list(fnscale = -1)); return(list(sig2 = fit\$par, logL = fit\$value, convergence = fit\$convergence)); } ... ... @@ -136,7 +134,7 @@ fitBM <- function(tree, x, anc, sigma2, pruning=TRUE, optim=TRUE) { } else { if(pruning) { return(lnLike(tree, x, sigma2, optim)); return(lnLike(tree, x, sigma2, getLnl=getLnl)); } else { return(sum(dmnorm(x, mean = rep(anc, n), varcov = sigma2 * vcv(tree), log = TRUE))); ... ... @@ -150,26 +148,45 @@ fitBM <- function(tree, x, anc, sigma2, pruning=TRUE, optim=TRUE) { require(phytools); tree<-pbtree(n=1000, scale=1); data<-data.frame(char=fastBM(tree, sig2=1, a=10), row.names=tree\$tip.label); #### testing if the pruning algorithm or the typical multinormal density give the same likelhood: yes they do! tree<-pbtree(n=50, scale=1); data<-data.frame(char=fastBM(tree, sig2=2, a=0), row.names=tree\$tip.label); anc2<-fitBM(tree, data[,1], 0, 2.5, pruning=FALSE, optim=TRUE)\$x0 anc1<-fitBM(tree, data[,1], 0.646066, 0.816942, pruning=TRUE, optim=TRUE)\$x0 s <- seq(0.1, 2.5, by=0.1) s <- seq(0.1, 4.5, by=0.1) res1 <- numeric(length(s)) res2 <- numeric(length(s)) for(i in 1:length(s)) { res1[i] <- fitBM(tree, data[,1], anc1, s[i], pruning=TRUE, optim=FALSE) res2[i] <- fitBM(tree, data[,1], anc2, s[i], pruning=FALSE, optim=FALSE) res1[i] <- fitBM(tree, data[,1], sigma2=s[i], pruning=TRUE, optim=FALSE, getLnl=TRUE) res2[i] <- fitBM(tree, data[,1], -0.880113, s[i], pruning=FALSE, optim=FALSE) } plot(s, res2, t="l", col="red") lines(s, res1, t="l") which(res1 == max(res1)) which(res2 == max(res2)) anc2 anc1 system.time(fitBM(tree, data[,1], 0.646066, 0.816942, pruning=TRUE, optim=TRUE)) fitBM(tree, data[,1], sigma2=0.816942, pruning=TRUE, optim=FALSE) \ No newline at end of file ### testing the speed up. The pruning is much faster for large trees: tree<-pbtree(n=500, scale=1); data<-data.frame(char=fastBM(tree, sig2=2, a=0), row.names=tree\$tip.label); system.time(fitBM(tree, data[,1], 0.646066, 0.816942, pruning=FALSE, optim=FALSE)) system.time(fitBM(tree, data[,1], 10, sigma2=0.816942, pruning=TRUE, optim=FALSE)) ### testing the accuracy of the parameter estimates res<-numeric(length=100) anc<-numeric(length=100) trueS<-0.5 trueA<-1.0 for(i in 1:100) { tree<-pbtree(n=20, scale=1); data<-data.frame(char=fastBM(tree, sig2=trueS, a=trueA), row.names=tree\$tip.label); res[i]<-fitBM(tree, data[,1], sigma2=2.0, pruning=TRUE, optim=TRUE, getLnl=TRUE)\$sig2 anc[i]<-fitBM(tree, data[,1], sigma2=res[i], pruning=TRUE, optim=FALSE, getLnl=FALSE) } plot(res, ylim=c(0,2*trueS)) abline(h=trueS, col="red") plot(anc, ylim=c(0, 3*trueA)) abline(h=trueA, col="red") \ No newline at end of file
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment