# some utility functions # written by Liam J. Revell 2011, 2012, 2013 # function to get states at internal nodes from simmap style trees # written by Liam J. Revell 2013 getStates<-function(tree,type=c("nodes","tips")){ type<-type[1] if(class(tree)=="multiPhylo"){ tree<-unclass(tree) y<-sapply(tree,getStates,type=type) } else if(class(tree)=="phylo"){ if(type=="nodes"){ y<-setNames(sapply(tree$maps,function(x) names(x)[1]),tree$edge[,1]) y<-y[as.character(length(tree$tip)+1:tree$Nnode)] } else if(type=="tips"){ y<-setNames(sapply(tree$maps,function(x) names(x)[length(x)]),tree$edge[,2]) y<-setNames(y[as.character(1:length(tree$tip.label))],tree$tip.label) } } else stop("tree should be an object of class 'phylo' or 'multiPhylo'") return(y) } # function to summarize the results of stochastic mapping # written by Liam J. Revell 2013 describe.simmap<-function(tree,...){ if(hasArg(plot)) plot<-list(...)$plot else plot<-FALSE if(hasArg(check.equal)) check.equal<-list(...)$check.equal else check.equal<-FALSE if(hasArg(message)) message<-list(...)$message else message<-TRUE if(class(tree)=="multiPhylo"){ if(check.equal){ TT<-sapply(tree,function(x,y) sapply(y,all.equal.phylo,x),y=tree) check<-all(TT) if(!check) warning("some trees not equal") } YY<-getStates(tree) states<-sort(unique(as.vector(YY))) ZZ<-t(apply(YY,1,function(x,levels,Nsim) summary(factor(x,levels))/Nsim,levels=states,Nsim=length(tree))) XX<-countSimmap(tree,states,FALSE) XX<-XX[,-(which(as.vector(diag(-1,length(states)))==-1)+1)] AA<-t(sapply(tree,function(x) c(colSums(x$mapped.edge),sum(x$edge.length)))) colnames(AA)[ncol(AA)]<-"total" BB<-getStates(tree,type="tips") CC<-t(apply(BB,1,function(x,levels,Nsim) summary(factor(x,levels))/Nsim,levels=states,Nsim=length(tree))) if(message){ cat(paste(length(tree),"trees with a mapped discrete character with states:\n",paste(states,collapse=", "),"\n\n")) cat(paste("trees have",colMeans(XX)["N"],"changes between states on average\n\n")) cat(paste("changes are of the following types:\n")) aa<-t(as.matrix(colMeans(XX)[2:ncol(XX)])); rownames(aa)<-"x->y"; print(aa) cat(paste("\nmean total time spent in each state is:\n")) print(matrix(c(colMeans(AA),colMeans(AA[,1:ncol(AA)]/AA[,ncol(AA)])),2,ncol(AA),byrow=TRUE,dimnames=list(c("raw","prop"),c(colnames(AA))))) cat("\n") } if(plot){ plot(tree[[1]],edge.width=2,no.margin=TRUE,label.offset=0.015*max(nodeHeights(tree[[1]])),...) nodelabels(pie=ZZ,piecol=1:length(states),cex=0.6) tips<-CC tiplabels(pie=tips,piecol=1:length(states),cex=0.5) L<-list(count=XX,times=AA,tips=tips,ace=ZZ,legend=setNames(states,palette()[1:length(states)])) if(message) invisible(L) else return(L) } else { L<-list(count=XX,times=AA,ace=ZZ) if(message) invisible(L) else return(L) } } else if(class(tree)=="phylo"){ XX<-countSimmap(tree,message=FALSE) YY<-getStates(tree) states<-sort(unique(YY)) AA<-setNames(c(colSums(tree$mapped.edge),sum(tree$edge.length)),c(colnames(tree$mapped.edge),"total")) AA<-rbind(AA,AA/AA[length(AA)]); rownames(AA)<-c("raw","prop") if(message){ cat(paste("1 tree with a mapped discrete character with states:\n",paste(states,collapse=", "),"\n\n")) cat(paste("tree has",XX$N,"changes between states\n\n")) cat(paste("changes are of the following types:\n")) print(XX$Tr) cat(paste("\nmean total time spent in each state is:\n")) print(AA) cat("\n") } if(plot){ plotSimmap(tree,colors=setNames(palette()[1:length(states)],states),pts=FALSE) L<-list(N=XX$N,Tr=XX$Tr,times=AA,states=YY,legend=setNames(states,palette()[1:length(states)])) if(message) invisible(L) else return(L) } else { L<-list(N=XX$N,Tr=XX$Tr,times=AA,states=YY) if(message) invisible(L) else return(L) } } } # function counts transitions from a mapped history # written by Liam J. Revell 2013 countSimmap<-function(tree,states=NULL,message=TRUE){ if(class(tree)=="multiPhylo"){ ff<-function(zz){ XX<-countSimmap(zz,states,message) setNames(c(XX$N,as.vector(t(XX$Tr))),c("N", sapply(rownames(XX$Tr),paste,colnames(XX$Tr),sep=","))) } tree<-unclass(tree) XX<-t(sapply(tree,ff)) if(!message) return(XX) else return(list(Tr=XX,message= c("Column N is the total number of character changes on the tree", "Other columns give transitions x,y from x->y"))) } else if(class(tree)=="phylo") { n<-sum(sapply(tree$maps,length))-nrow(tree$edge) if(is.null(states)) states<-colnames(tree$mapped.edge) m<-length(states) TT<-matrix(NA,m,m,dimnames=list(states,states)) gg<-function(map,a,b){ if(length(map)==1) zz<-0 else { zz<-0; i<-2 while(i<=length(map)){ if(names(map)[i]==b&&names(map)[i-1]==a) zz<-zz+1 i<-i+1 } } return(zz) } for(i in 1:m) for(j in 1:m) if(i==j) TT[i,j]<-0 else TT[i,j]<-sum(sapply(tree$maps,gg,a=states[i],b=states[j])) if(!message) return(list(N=n,Tr=TT)) else return(list(N=n,Tr=TT,message=c( "N is the total number of character changes on the tree", "Tr gives the number of transitions from row state->column state"))) } } # function to re-root a phylogeny along an edge # written by Liam Revell 2011-2013 reroot<-function(tree,node.number,position){ if(class(tree)!="phylo") stop("tree object must be of class 'phylo.'") tt<-splitTree(tree,list(node=node.number,bp=position)) p<-tt[[1]]; d<-tt[[2]] p<-root(p,outgroup="NA",resolve.root=T) bb<-which(p$tip.label=="NA") ee<-p$edge.length[which(p$edge[,2]==bb)] p$edge.length[which(p$edge[,2]==bb)]<-0 cc<-p$edge[which(p$edge[,2]==bb),1] dd<-setdiff(p$edge[which(p$edge[,1]==cc),2],bb) p$edge.length[which(p$edge[,2]==dd)]<-p$edge.length[which(p$edge[,2]==dd)]+ee tt<-paste.tree(p,d) return(tt) } # function returns random state with probability given by y # written by Liam J. Revell 2013 (replaces earlier version) rstate<-function(y){ if(length(y)==1) return(names(y)[1]) else return(names(which(rmultinom(1,1,y/sum(y))[,1]==1))) } # function to match nodes between trees # written by Liam J. Revell 2012, 2013 matchNodes<-function(tr1,tr2,method=c("descendants","distances"),...){ method<-method[1] method<-matchType(method,c("descendants","distances")) if(method=="descendants"){ desc.tr1<-lapply(1:tr1$Nnode+length(tr1$tip),function(x) extract.clade(tr1,x)$tip.label) names(desc.tr1)<-1:tr1$Nnode+length(tr1$tip) desc.tr2<-lapply(1:tr2$Nnode+length(tr2$tip),function(x) extract.clade(tr2,x)$tip.label) names(desc.tr2)<-1:tr2$Nnode+length(tr2$tip) Nodes<-matrix(NA,length(desc.tr1),2,dimnames=list(NULL,c("tr1","tr2"))) for(i in 1:length(desc.tr1)){ Nodes[i,1]<-as.numeric(names(desc.tr1)[i]) for(j in 1:length(desc.tr2)) if(all(desc.tr1[[i]]%in%desc.tr2[[j]])&&all(desc.tr2[[j]]%in%desc.tr1[[i]])) Nodes[i,2]<-as.numeric(names(desc.tr2)[j]) } } else if(method=="distances"){ if(hasArg(tol)) tol<-list(...)$tol else tol<-1e-6 if(hasArg(corr)) corr<-list(...)$corr else corr<-FALSE if(corr) tr1$edge.length<-tr1$edge.length/max(nodeHeights(tr1)) if(corr) tr2$edge.length<-tr2$edge.length/max(nodeHeights(tr2)) D1<-dist.nodes(tr1)[1:length(tr1$tip),1:tr1$Nnode+length(tr1$tip)] D2<-dist.nodes(tr2)[1:length(tr2$tip),1:tr2$Nnode+length(tr2$tip)] rownames(D1)<-tr1$tip.label rownames(D2)<-tr2$tip.label common.tips<-intersect(tr1$tip.label,tr2$tip.label) D1<-D1[common.tips,] D2<-D2[common.tips,] Nodes<-matrix(NA,tr1$Nnode,2,dimnames=list(NULL,c("tr1","tr2"))) for(i in 1:tr1$Nnode){ if(corr) z<-apply(D2,2,function(X,y) cor(X,y),y=D1[,i]) else z<-apply(D2,2,function(X,y) 1-sum(abs(X-y)),y=D1[,i]) Nodes[i,1]<-as.numeric(colnames(D1)[i]) if(any(z>=(1-tol))){ a<-as.numeric(names(which(z>=(1-tol)))) if(length(a)==1) Nodes[i,2]<-a else { Nodes[i,2]<-a[1] warning("polytomy detected; some node matches may be arbitrary") } } } } return(Nodes) } # function rounds the branch lengths of the tree & applies rounding to simmap tree # written by Liam J. Revell 2012 roundBranches<-function(tree,digits=0){ if(class(tree)=="multiPhylo"){ trees<-lapply(tree,roundBranches) class(trees)<-"multiPhylo" return(trees) } else { tree$edge.length<-round(tree$edge.length,digits) if(!is.null(tree$maps)){ for(i in 1:nrow(tree$edge)){ temp<-tree$maps[[i]]/sum(tree$maps[[i]]) tree$maps[[i]]<-temp*tree$edge.length[i] } } if(!is.null(tree$mapped.edge)){ a<-vector() for(i in 1:nrow(tree$edge)) a<-c(a,names(tree$maps[[i]])) a<-unique(a) tree$mapped.edge<-matrix(data=0,length(tree$edge.length),length(a),dimnames=list(apply(tree$edge,1,function(x) paste(x,collapse=",")),state=a)) for(i in 1:length(tree$maps)) for(j in 1:length(tree$maps[[i]])) tree$mapped.edge[i,names(tree$maps[[i]])[j]]<-tree$mapped.edge[i,names(tree$maps[[i]])[j]]+tree$maps[[i]][j] } return(tree) } } # function applies the branch lengths of a reference tree to a second tree, including mappings # written by Liam J. Revell 2012 applyBranchLengths<-function(tree,edge.length){ if(class(tree)=="multiPhylo"){ trees<-lapply(tree,applyBranchLengths,edge.length=edge.length) class(trees)<-"multiPhylo" return(trees) } else { tree$edge.length<-edge.length if(!is.null(tree$maps)){ for(i in 1:nrow(tree$edge)){ temp<-tree$maps[[i]]/sum(tree$maps[[i]]) tree$maps[[i]]<-temp*tree$edge.length[i] } } if(!is.null(tree$mapped.edge)){ a<-vector() for(i in 1:nrow(tree$edge)) a<-c(a,names(tree$maps[[i]])) a<-unique(a) tree$mapped.edge<-matrix(data=0,length(tree$edge.length),length(a),dimnames=list(apply(tree$edge,1,function(x) paste(x,collapse=",")),state=a)) for(i in 1:length(tree$maps)) for(j in 1:length(tree$maps[[i]])) tree$mapped.edge[i,names(tree$maps[[i]])[j]]<-tree$mapped.edge[i,names(tree$maps[[i]])[j]]+tree$maps[[i]][j] } return(tree) } } # function to compute phylogenetic VCV using joint Pagel's lambda # written by Liam Revell 2011 phyl.vcv<-function(X,C,lambda){ C<-lambda.transform(lambda,C) invC<-solve(C) a<-matrix(colSums(invC%*%X)/sum(invC),ncol(X),1) A<-matrix(rep(a,nrow(X)),nrow(X),ncol(X),byrow=T) V<-t(X-A)%*%invC%*%(X-A)/(nrow(C)-1) return(list(C=C,R=V,alpha=a)) } # lambda transformation of C # written by Liam Revell 2011 lambda.transform<-function(lambda,C){ if(lambda==1) return(C) else { V<-diag(diag(C)) C<-C-V C.lambda<-(V+lambda*C) return(C.lambda) } } # likelihood function for joint estimation of lambda for multiple traits # written by Liam Revell 2011/2012 likMlambda<-function(lambda,X,C){ # compute R, conditioned on lambda temp<-phyl.vcv(X,C,lambda); C<-temp$C; R<-temp$R; a<-temp$alpha # prep n<-nrow(X); m<-ncol(X); D<-matrix(0,n*m,m) for(i in 1:(n*m)) for(j in 1:m) if((j-1)*n1) if(length(intersect(states,ordering))n)) result$nodes<-sisters[which(sisters>n)] else if(any(sisters>n)) result$nodes<-tree$node.label[sisters[which(sisters>n)]-n] if(any(sisters<=n)) result$tips<-tree$tip.label[sisters[which(sisters<=n)]] return(result) } } # gets descendant node numbers # written by Liam Revell 2012, 2013 getDescendants<-function(tree,node,curr=NULL){ if(is.null(curr)) curr<-vector() daughters<-tree$edge[which(tree$edge[,1]==node),2] curr<-c(curr,daughters) if(length(curr)==0&&node<=length(tree$tip.label)) curr<-node w<-which(daughters>=length(tree$tip)) if(length(w)>0) for(i in 1:length(w)) curr<-getDescendants(tree,daughters[w[i]],curr) return(curr) } # function computes vcv for each state, and stores in array # written by Liam J. Revell 2011/2012 multiC<-function(tree){ n<-length(tree$tip); m<-ncol(tree$mapped.edge) # compute separate C for each state mC<-list() for(i in 1:m){ mtree<-list(edge=tree$edge,Nnode=tree$Nnode,tip.label=tree$tip.label,edge.length=tree$mapped.edge[,i]) class(mtree)<-"phylo" mC[[i]]<-vcv.phylo(mtree) } return(mC) } # function pastes subtree onto tip # written by Liam Revell 2011 paste.tree<-function(tr1,tr2){ if(length(tr2$tip)>1){ temp<-tr2$root.edge; tr2$root.edge<-NULL tr1$edge.length[match(which(tr1$tip.label=="NA"),tr1$edge[,2])]<-tr1$edge.length[match(which(tr1$tip.label=="NA"),tr1$edge[,2])]+temp } tr.bound<-bind.tree(tr1,tr2,where=which(tr1$tip.label=="NA")) return(tr.bound) } # function drops entire clade # written by Liam Revell 2011 drop.clade<-function(tree,tip){ tree<-drop.tip(tree,tip,trim.internal=FALSE) while(sum(tree$tip.label=="NA")>1){ tree<-drop.tip(tree,"NA",trim.internal=FALSE) } return(tree) } # match type # written by Liam J. Revell 2012 matchType<-function(type,types){ for(i in 1:length(types)) if(all(strsplit(type,split="")[[1]]==strsplit(types[i],split="")[[1]][1:length(strsplit(type,split="")[[1]])])) type=types[i] return(type) } # wraps around MatrixExp # written by Liam Revell 2011 expm<-function(Y){ Z<-MatrixExp(Y); dimnames(Z)<-dimnames(Y) return(Z) } # function reorders simmap tree # written Liam Revell 2011 reorderSimmap<-function(tree,order="cladewise"){ ntree<-reorder(tree,order) o<-whichorder(ntree$edge[,2],tree$edge[,2]) ntree$mapped.edge<-tree$mapped.edge[o,] ntree$maps<-tree$maps[o] return(ntree) } # function 'untangles' (or attempts to untangle) a tree with crossing branches # written by Liam J. Revell 2013 untangle<-function(tree,method=c("reorder","read.tree")){ method<-method[1] if(!is.null(tree$maps)) simmap<-TRUE else simmap<-FALSE if(method=="reorder"){ if(simmap) tree<-reorderSimmap(reorderSimmap(tree,"pruningwise")) else tree<-reorder(reorder(tree,"pruningwise")) } else if(method=="read.tree"){ if(simmap){ stop("Option 'read.tree' does not presently work for SIMMAP style trees") # tree<-read.simmap(text=write.simmap(tree)) } else tree<-read.tree(text=write.tree(tree)) } return(tree) }