Mercurial > repos > lecorguille > anova
diff abims_anova.r @ 10:b147b17759a6 draft
planemo upload for repository https://github.com/workflow4metabolomics/anova commit 28838bb8dafd6d286157db77f181ed8a1b586664
author | lecorguille |
---|---|
date | Wed, 28 Feb 2018 07:44:13 -0500 |
parents | 8dd2a438bfba |
children | 102049093b7d |
line wrap: on
line diff
--- a/abims_anova.r Thu Oct 26 09:30:56 2017 -0400 +++ b/abims_anova.r Wed Feb 28 07:44:13 2018 -0500 @@ -3,7 +3,7 @@ # date: 06-06-2012 # update: 18-02-2014 -# **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr +# **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr # abims_anova.r version 20140218 @@ -11,25 +11,35 @@ # function avova -anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") { - - +anova = function (file, sampleinfo, varinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatasignif="anova.datasignif.output") { + + if (sep=="tabulation") sep="\t" if (sep=="semicolon") sep=";" if (sep=="comma") sep="," anova_formula_operator = "+" if (interaction) anova_formula_operator = "*" - + # -- import -- - data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA") - + data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA", check.names=FALSE) + if (mode == "row") data=t(data) - + sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"") rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab)) - + varinfoTab=read.table(varinfo, header = TRUE, sep = sep, quote="\"") + if(sum(colnames(data)!=varinfoTab[,1])!=0){ # if ID not exactly identical + if(sum(colnames(data)[order(colnames(data))]!=varinfoTab[order(varinfoTab[,1]),1])==0){ + # reorder data matrix to match variable metadata order + data = data[,match(varinfoTab[,1],colnames(data))] + }else{ + stop(paste0("\nVariables' ID do not match between your data matrix and your variable", + "metadata file. \nPlease check your data.")) + } + } + # -- group -- match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab)) if (sum(is.na(match_data_sampleinfoTab)) > 0) { @@ -41,10 +51,10 @@ write(head(rownames(sampleinfoTab)), stderr()) quit("no",status=10) } - - + + # -- anova -- - + # formula grps=list() anova_formula_s = "data ~ " @@ -60,16 +70,16 @@ anova_formula = as.formula(anova_formula_s) - + # anova manovaObjectList = manova(anova_formula) manovaList = summary.aov(manovaObjectList) - + # condition renaming manovaRownames = gsub(" ","",rownames(manovaList[[1]])) manovaNbrPvalue = length(manovaRownames)-1 manovaRownames = manovaRownames[-(manovaNbrPvalue+1)] - + for (i in 1:length(condition)) { manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames) anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s) @@ -77,45 +87,94 @@ # log cat("\nanova_formula",anova_formula_s,"\n") - + # p-value aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]}) if(length(condition) == 1) aovPValue = t(aovPValue) rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="") - + # p-value adjusted if(length(condition) == 1) { aovAdjPValue = t(p.adjust(aovPValue,method=method)) } else { - aovAdjPValue = apply(aovPValue,2,p.adjust, method=method) + aovAdjPValue = t(apply(aovPValue,1,p.adjust, method=method)) } - rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="") - + rownames(aovAdjPValue) = paste("pval.",method,".",manovaRownames,sep="") + # selection colSumThreshold = colSums(aovAdjPValue <= threshold) if (selection_method == "intersection") { - datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )] + datafiltered = data[,colSumThreshold == nrow(aovAdjPValue ), drop=FALSE] } else { - datafiltered = data[,colSumThreshold != 0] + datafiltered = data[,colSumThreshold != 0, drop=FALSE] } - + selected.var = rep("no",ncol(data)) + selected.var[colnames(data)%in%colnames(datafiltered)] = "yes" + #data=rbind(data, aovPValue, aovAdjPValue) - data=rbind(data, aovAdjPValue) + varinfoTab=cbind(varinfoTab, round(t(aovAdjPValue),10), selected.var) + + # group means + for (i in 1:length(condition)) { + for(j in levels(grps[[i]])){ + subgp = rownames(sampleinfoTab[which(sampleinfoTab[,condition[i]]==j),]) + modmean = colMeans(data[which(rownames(data)%in%subgp),],na.rm=TRUE) + varinfoTab=cbind(varinfoTab, modmean) + colnames(varinfoTab)[ncol(varinfoTab)] = paste0("Mean_",condition[i],".",j) + } + } + colnames(varinfoTab) = make.unique(colnames(varinfoTab)) - - if (mode == "row") { - data=t(data) - datafiltered=t(datafiltered) + # pdf for significant variables + pdf(outputdatasignif) + ### Venn diagram + if(nrow(aovAdjPValue)>5){ + pie(100,labels=NA,main=paste0("Venn diagram only available for maximum 5 terms\n", + "(your analysis concerns ",nrow(aovAdjPValue)," terms)\n\n", + "Number of significant variables relatively to\nyour chosen threshold and ", + "selection method: ",ncol(datafiltered)),cex.main=0.8) + }else{ + vennlist = list(NULL) + names(vennlist) = rownames(aovAdjPValue)[1] + if(length(colnames(aovAdjPValue))==0){colnames(aovAdjPValue)=varinfoTab[,1]} + for(i in 1:nrow(aovAdjPValue)){ + vennlist[[rownames(aovAdjPValue)[i]]]=colnames(aovAdjPValue[i,which(aovAdjPValue[i,]<=threshold),drop=FALSE]) + } + if(length(vennlist)==0){ pie(100,labels=NA,main="No significant ions was found.") + }else{ library(venn) ; venn(vennlist, zcolor="style", cexil=2, cexsn=1.5) } } - + ### Boxplot + par(mfrow=c(2,2),mai=rep(0.5,4)) + data <- as.data.frame(data) + for(i in 1:nrow(aovAdjPValue)){ + factmain = gsub(paste0("pval.",method,"."),"",rownames(aovAdjPValue)[i]) + factsignif = aovAdjPValue[i,which(aovAdjPValue[i,]<=threshold),drop=FALSE] + if((ncol(factsignif)!=0)&(factmain%in%colnames(sampleinfoTab))){ + for(j in 1:ncol(factsignif)){ + varsignif = gsub(" Response ","",colnames(factsignif)[j]) + boxplot(as.formula(paste0("data$",varsignif," ~ sampleinfoTab$",factmain)), + main=paste0(factmain,"\n",varsignif), col="grey", mai=7) + } + } + } + dev.off() + + # summary for significant variables + cat("\n\n- - - - - - - number of significant variables - - - - - - -\n\n") + for(i in 1:nrow(aovAdjPValue)){ + cat(rownames(aovAdjPValue)[i],"-", + sum(aovAdjPValue[i,]<=threshold),"significant variable(s)\n") + } + cat("\nIf some of your factors are missing here, this may be due to\neffects", + "not estimable; your design may not be balanced enough.\n") + # -- output / return -- - write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA) - write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA) - - # log + write.table(varinfoTab, outputdatapvalue, sep=sep, quote=F, row.names=FALSE) + + # log cat("\nthreshold:",threshold,"\n") - cat("result:",nrow(datafiltered),"/",nrow(data),"\n") - + cat("result:",ncol(datafiltered),"/",ncol(data),"\n\n") + quit("no",status=0) } @@ -127,6 +186,3 @@ listArguments = parseCommandArgs(evaluate=FALSE) do.call(anova, listArguments) - - -