# function creates a stochastic character mapped tree as a modified "phylo" object # written by Liam Revell 2013, 2014, 2015 make.simmap<-function(tree,x,model="SYM",nsim=1,...){ if(inherits(tree,"multiPhylo")){ ff<-function(yy,x,model,nsim,...){ zz<-make.simmap(yy,x,model,nsim,...) if(nsim>1) class(zz)<-NULL return(zz) } if(nsim>1) mtrees<-unlist(sapply(tree,ff,x,model,nsim,...,simplify=FALSE),recursive=FALSE) else mtrees<-sapply(tree,ff,x,model,nsim,...,simplify=FALSE) class(mtrees)<-"multiPhylo" } else { ## get optional arguments if(hasArg(pi)) pi<-list(...)$pi else pi<-"equal" if(hasArg(message)) pm<-list(...)$message else pm<-TRUE if(hasArg(tol)) tol<-list(...)$tol else tol<-0 if(hasArg(Q)) Q<-list(...)$Q else Q<-"empirical" if(hasArg(burnin)) burnin<-list(...)$burnin else burnin<-1000 if(hasArg(samplefreq)) samplefreq<-list(...)$samplefreq else samplefreq<-100 if(hasArg(vQ)) vQ<-list(...)$vQ else vQ<-0.1 prior<-list(alpha=1,beta=1,use.empirical=FALSE) if(hasArg(prior)){ pr<-list(...)$prior prior[names(pr)]<-pr } ## done optional arguments # check if(!inherits(tree,"phylo")) stop("tree should be object of class \"phylo\".") # if vector convert to binary matrix if(!is.matrix(x)) xx<-to.matrix(x,sort(unique(x))) else xx<-x xx<-xx[tree$tip.label,] xx<-xx/rowSums(xx) # reorder to cladewise tree<-bt<-reorder.phylo(tree,"cladewise") if(!is.binary.tree(bt)) bt<-multi2di(bt) # some preliminaries N<-length(tree$tip) m<-ncol(xx) root<-N+1 # get conditional likelihoods & model if(is.character(Q)&&Q=="empirical"){ XX<-getPars(bt,xx,model,Q=NULL,tree,tol,m) L<-XX$L Q<-XX$Q logL<-XX$loglik if(pi[1]=="equal") pi<-setNames(rep(1/m,m),colnames(L)) # set equal else if(pi[1]=="estimated") pi<-statdist(Q) # set from stationary distribution else pi<-pi/sum(pi) # obtain from input if(pm) printmessage(Q,pi,method="empirical") mtrees<-replicate(nsim,smap(tree,x,N,m,root,L,Q,pi,logL),simplify=FALSE) } else if(is.character(Q)&&Q=="mcmc"){ if(prior$use.empirical){ qq<-apeAce(bt,xx,model)$rates prior$alpha<-qq*prior$beta } XX<-mcmcQ(bt,xx,model,tree,tol,m,burnin,samplefreq,nsim,vQ,prior) L<-lapply(XX,function(x) x$L) Q<-lapply(XX,function(x) x$Q) logL<-lapply(XX,function(x) x$loglik) if(pi[1]=="equal"){ pi<-setNames(rep(1/m,m),colnames(L)) # set equal pi<-lapply(1:nsim,function(x,y) y, y=pi) } else if(pi[1]=="estimated"){ pi<-lapply(Q,statdist) # set from stationary distribution } else { pi<-pi/sum(pi) # obtain from input pi<-lapply(1:nsim,function(x,y) y, y=pi) } if(pm) printmessage(Reduce('+',Q)/length(Q),pi,method="mcmc") mtrees<-mapply(smap,L=L,Q=Q,pi=pi,logL=logL,MoreArgs=list(tree=tree,x=x,N=N,m=m,root=root),SIMPLIFY=FALSE) } else if(is.matrix(Q)){ XX<-getPars(bt,xx,model,Q=Q,tree,tol,m) L<-XX$L logL<-XX$loglik if(pi[1]=="equal") pi<-setNames(rep(1/m,m),colnames(L)) # set equal else if(pi[1]=="estimated") pi<-statdist(Q) # set from stationary distribution else pi<-pi/sum(pi) # obtain from input if(pm) printmessage(Q,pi,method="fixed") mtrees<-replicate(nsim,smap(tree,x,N,m,root,L,Q,pi,logL),simplify=FALSE) } if(length(mtrees)==1) mtrees<-mtrees[[1]] else class(mtrees)<-"multiPhylo" } (if(hasArg(message)) list(...)$message else TRUE) if((if(hasArg(message)) list(...)$message else TRUE)&&inherits(tree,"phylo")) message("Done.") return(mtrees) } # print message # written by Liam J. Revell 2013 printmessage<-function(Q,pi,method){ if(method=="empirical"||method=="fixed") message("make.simmap is sampling character histories conditioned on the transition matrix\nQ =") else if(method=="mcmc"){ message("make.simmap is simulating with a sample of Q from the posterior distribution") message("Mean Q from the posterior is\nQ =") } print(Q) if(method=="empirical") message("(estimated using likelihood);") else if(method=="fixed") message("(specified by the user);") message("and (mean) root node prior probabilities\npi =") if(is.list(pi)) pi<-Reduce("+",pi)/length(pi) print(pi) } # mcmc for Q used in Q="mcmc" # written by Liam J. Revell 2013 mcmcQ<-function(bt,xx,model,tree,tol,m,burnin,samplefreq,nsim,vQ,prior){ update<-function(x){ ## x<-exp(log(x)+rnorm(n=np,mean=0,sd=sqrt(vQ))) x<-abs(x+rnorm(n=np,mean=0,sd=sqrt(vQ))) return(x) } # get model matrix if(is.character(model)){ rate<-matrix(NA,m,m) if(model=="ER"){ np<-rate[]<-1 diag(rate)<-NA } if(model=="ARD"){ np<-m*(m-1) rate[col(rate)!=row(rate)]<-1:np } if (model=="SYM") { np<-m*(m-1)/2 sel<-col(rate)=runif(n=1)){ yy<-zz p<-pp } } # now run MCMC generation, sampling at samplefreq cat(paste("Running",samplefreq*nsim,"generations of MCMC, sampling every",samplefreq,"generations. Please wait....\n")) XX<-vector("list",nsim) for(i in 1:(samplefreq*nsim)){ pp<-update(p) Qp<-matrix(pp[rate],m,m) diag(Qp)<--rowSums(Qp,na.rm=TRUE) zz<-getPars(bt,xx,model,Qp,tree,tol,m,FALSE) p.odds<-exp(zz$loglik+sum(dgamma(pp,prior$alpha,prior$beta,log=TRUE))- yy$loglik-sum(dgamma(p,prior$alpha,prior$beta,log=TRUE))) if(p.odds>=runif(n=1)){ yy<-zz p<-pp } if(i%%samplefreq==0){ Qi<-matrix(p[rate],m,m) diag(Qi)<--rowSums(Qi,na.rm=TRUE) XX[[i/samplefreq]]<-getPars(bt,xx,model,Qi,tree,tol,m,TRUE) } } return(XX) } # get pars # written by Liam J. Revell 2013 getPars<-function(bt,xx,model,Q,tree,tol,m,liks=TRUE){ XX<-apeAce(bt,xx,model,fixedQ=Q,output.liks=liks) N<-length(bt$tip.label) II<-XX$index.matrix+1 lvls<-XX$states if(liks){ L<-XX$lik.anc rownames(L)<-N+1:nrow(L) if(!is.binary.tree(tree)){ ancNames<-matchNodes(tree,bt) L<-L[as.character(ancNames[,2]),] rownames(L)<-ancNames[,1] } L<-rbind(xx,L) rownames(L)[1:N]<-1:N } else L<-NULL Q<-matrix(c(0,XX$rates)[II],m,m,dimnames=list(lvls,lvls)) if(any(rowSums(Q,na.rm=TRUE)0) rexp(n=1,rate=-Q[s,s]) else t-sum(dt) if(sum(dt)<(t-tol)){ dt<-c(dt,0) if(sum(Q[s,][-match(s,colnames(Q))])>0) names(dt)[length(dt)]<-rstate(Q[s,][-match(s,colnames(Q))]/sum(Q[s,][-match(s,colnames(Q))])) else names(dt)[length(dt)]<-s } else dt[length(dt)]<-dt[length(dt)]-sum(dt)+t } return(dt) } # function returns random state with probability given by y # written by Liam J. Revell 2013 rstate<-function(y){ if(length(y)==1) return(names(y)[1]) else return(names(which(rmultinom(1,1,y/sum(y))[,1]==1))) } # function uses numerical optimization to solve for the stationary distribution # written by Liam J. Revell 2013 statdist<-function(Q){ foo<-function(theta,Q){ Pi<-c(theta[1:(nrow(Q)-1)],1-sum(theta[1:(nrow(Q)-1)])) sum((Pi%*%Q)^2) } k<-nrow(Q) if(nrow(Q)>2){ fit<-optim(rep(1/k,k-1),foo,Q=Q,control=list(reltol=1e-16)) return(setNames(c(fit$par[1:(k-1)],1-sum(fit$par[1:(k-1)])),rownames(Q))) } else { fit<-optimize(foo,interval=c(0,1),Q=Q) return(setNames(c(fit$minimum,1-fit$minimum),rownames(Q))) } } # function for conditional likelihoods at nodes, from ace(...,type="discrete") # modified (only very slightly) from E. Paradis et al. 2013 apeAce<-function(tree,x,model,fixedQ=NULL,...){ if(hasArg(output.liks)) output.liks<-list(...)$output.liks else output.liks<-TRUE ip<-0.1 nb.tip<-length(tree$tip.label) nb.node<-tree$Nnode if(is.matrix(x)){ x<-x[tree$tip.label,] nl<-ncol(x) lvls<-colnames(x) } else { x<-x[tree$tip.label] if(!is.factor(x)) x<-factor(x) nl<-nlevels(x) lvls<-levels(x) x<-as.integer(x) } if(is.null(fixedQ)){ if(is.character(model)){ rate<-matrix(NA,nl,nl) if(model=="ER") np<-rate[]<-1 if(model=="ARD"){ np<-nl*(nl-1) rate[col(rate)!=row(rate)]<-1:np } if (model=="SYM") { np<-nl*(nl-1)/2 sel<-col(rate)