comparison abims_anova.r @ 10:b147b17759a6 draft

planemo upload for repository https://github.com/workflow4metabolomics/anova commit 28838bb8dafd6d286157db77f181ed8a1b586664
author lecorguille
date Wed, 28 Feb 2018 07:44:13 -0500
parents 8dd2a438bfba
children 102049093b7d
comparison
equal deleted inserted replaced
9:4f06e1796334 10:b147b17759a6
1 #!/usr/local/public/bin/Rscript 1 #!/usr/local/public/bin/Rscript
2 # version="1.1" 2 # version="1.1"
3 3
4 # date: 06-06-2012 4 # date: 06-06-2012
5 # update: 18-02-2014 5 # update: 18-02-2014
6 # **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr 6 # **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr
7 7
8 # abims_anova.r version 20140218 8 # abims_anova.r version 20140218
9 9
10 library(batch) 10 library(batch)
11 11
12 12
13 # function avova 13 # function avova
14 anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") { 14 anova = function (file, sampleinfo, varinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatasignif="anova.datasignif.output") {
15 15
16 16
17 if (sep=="tabulation") sep="\t" 17 if (sep=="tabulation") sep="\t"
18 if (sep=="semicolon") sep=";" 18 if (sep=="semicolon") sep=";"
19 if (sep=="comma") sep="," 19 if (sep=="comma") sep=","
20 20
21 anova_formula_operator = "+" 21 anova_formula_operator = "+"
22 if (interaction) anova_formula_operator = "*" 22 if (interaction) anova_formula_operator = "*"
23 23
24 # -- import -- 24 # -- import --
25 data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA") 25 data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA", check.names=FALSE)
26 26
27 if (mode == "row") data=t(data) 27 if (mode == "row") data=t(data)
28 28
29 sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"") 29 sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"")
30 rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab)) 30 rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab))
31 31
32 32 varinfoTab=read.table(varinfo, header = TRUE, sep = sep, quote="\"")
33 if(sum(colnames(data)!=varinfoTab[,1])!=0){ # if ID not exactly identical
34 if(sum(colnames(data)[order(colnames(data))]!=varinfoTab[order(varinfoTab[,1]),1])==0){
35 # reorder data matrix to match variable metadata order
36 data = data[,match(varinfoTab[,1],colnames(data))]
37 }else{
38 stop(paste0("\nVariables' ID do not match between your data matrix and your variable",
39 "metadata file. \nPlease check your data."))
40 }
41 }
42
33 # -- group -- 43 # -- group --
34 match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab)) 44 match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab))
35 if (sum(is.na(match_data_sampleinfoTab)) > 0) { 45 if (sum(is.na(match_data_sampleinfoTab)) > 0) {
36 write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr()) 46 write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr())
37 write("You may need to use change the mode (column/row)", stderr()) 47 write("You may need to use change the mode (column/row)", stderr())
39 write(head(colnames(data)), stderr()) 49 write(head(colnames(data)), stderr())
40 write("10 first sample names in the sample info:", stderr()) 50 write("10 first sample names in the sample info:", stderr())
41 write(head(rownames(sampleinfoTab)), stderr()) 51 write(head(rownames(sampleinfoTab)), stderr())
42 quit("no",status=10) 52 quit("no",status=10)
43 } 53 }
44 54
45 55
46 # -- anova -- 56 # -- anova --
47 57
48 # formula 58 # formula
49 grps=list() 59 grps=list()
50 anova_formula_s = "data ~ " 60 anova_formula_s = "data ~ "
51 cat("\ncontrasts:\n") 61 cat("\ncontrasts:\n")
52 for (i in 1:length(condition)) { 62 for (i in 1:length(condition)) {
58 } 68 }
59 anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1) 69 anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1)
60 anova_formula = as.formula(anova_formula_s) 70 anova_formula = as.formula(anova_formula_s)
61 71
62 72
63 73
64 # anova 74 # anova
65 manovaObjectList = manova(anova_formula) 75 manovaObjectList = manova(anova_formula)
66 manovaList = summary.aov(manovaObjectList) 76 manovaList = summary.aov(manovaObjectList)
67 77
68 # condition renaming 78 # condition renaming
69 manovaRownames = gsub(" ","",rownames(manovaList[[1]])) 79 manovaRownames = gsub(" ","",rownames(manovaList[[1]]))
70 manovaNbrPvalue = length(manovaRownames)-1 80 manovaNbrPvalue = length(manovaRownames)-1
71 manovaRownames = manovaRownames[-(manovaNbrPvalue+1)] 81 manovaRownames = manovaRownames[-(manovaNbrPvalue+1)]
72 82
73 for (i in 1:length(condition)) { 83 for (i in 1:length(condition)) {
74 manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames) 84 manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames)
75 anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s) 85 anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s)
76 } 86 }
77 87
78 # log 88 # log
79 cat("\nanova_formula",anova_formula_s,"\n") 89 cat("\nanova_formula",anova_formula_s,"\n")
80 90
81 # p-value 91 # p-value
82 aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]}) 92 aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]})
83 if(length(condition) == 1) aovPValue = t(aovPValue) 93 if(length(condition) == 1) aovPValue = t(aovPValue)
84 rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="") 94 rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="")
85 95
86 # p-value adjusted 96 # p-value adjusted
87 if(length(condition) == 1) { 97 if(length(condition) == 1) {
88 aovAdjPValue = t(p.adjust(aovPValue,method=method)) 98 aovAdjPValue = t(p.adjust(aovPValue,method=method))
89 } else { 99 } else {
90 aovAdjPValue = apply(aovPValue,2,p.adjust, method=method) 100 aovAdjPValue = t(apply(aovPValue,1,p.adjust, method=method))
91 } 101 }
92 rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="") 102 rownames(aovAdjPValue) = paste("pval.",method,".",manovaRownames,sep="")
93 103
94 # selection 104 # selection
95 colSumThreshold = colSums(aovAdjPValue <= threshold) 105 colSumThreshold = colSums(aovAdjPValue <= threshold)
96 if (selection_method == "intersection") { 106 if (selection_method == "intersection") {
97 datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )] 107 datafiltered = data[,colSumThreshold == nrow(aovAdjPValue ), drop=FALSE]
98 } else { 108 } else {
99 datafiltered = data[,colSumThreshold != 0] 109 datafiltered = data[,colSumThreshold != 0, drop=FALSE]
100 } 110 }
101 111 selected.var = rep("no",ncol(data))
112 selected.var[colnames(data)%in%colnames(datafiltered)] = "yes"
113
102 #data=rbind(data, aovPValue, aovAdjPValue) 114 #data=rbind(data, aovPValue, aovAdjPValue)
103 data=rbind(data, aovAdjPValue) 115 varinfoTab=cbind(varinfoTab, round(t(aovAdjPValue),10), selected.var)
104 116
105 117 # group means
106 if (mode == "row") { 118 for (i in 1:length(condition)) {
107 data=t(data) 119 for(j in levels(grps[[i]])){
108 datafiltered=t(datafiltered) 120 subgp = rownames(sampleinfoTab[which(sampleinfoTab[,condition[i]]==j),])
121 modmean = colMeans(data[which(rownames(data)%in%subgp),],na.rm=TRUE)
122 varinfoTab=cbind(varinfoTab, modmean)
123 colnames(varinfoTab)[ncol(varinfoTab)] = paste0("Mean_",condition[i],".",j)
124 }
109 } 125 }
110 126 colnames(varinfoTab) = make.unique(colnames(varinfoTab))
127
128 # pdf for significant variables
129 pdf(outputdatasignif)
130 ### Venn diagram
131 if(nrow(aovAdjPValue)>5){
132 pie(100,labels=NA,main=paste0("Venn diagram only available for maximum 5 terms\n",
133 "(your analysis concerns ",nrow(aovAdjPValue)," terms)\n\n",
134 "Number of significant variables relatively to\nyour chosen threshold and ",
135 "selection method: ",ncol(datafiltered)),cex.main=0.8)
136 }else{
137 vennlist = list(NULL)
138 names(vennlist) = rownames(aovAdjPValue)[1]
139 if(length(colnames(aovAdjPValue))==0){colnames(aovAdjPValue)=varinfoTab[,1]}
140 for(i in 1:nrow(aovAdjPValue)){
141 vennlist[[rownames(aovAdjPValue)[i]]]=colnames(aovAdjPValue[i,which(aovAdjPValue[i,]<=threshold),drop=FALSE])
142 }
143 if(length(vennlist)==0){ pie(100,labels=NA,main="No significant ions was found.")
144 }else{ library(venn) ; venn(vennlist, zcolor="style", cexil=2, cexsn=1.5) }
145 }
146 ### Boxplot
147 par(mfrow=c(2,2),mai=rep(0.5,4))
148 data <- as.data.frame(data)
149 for(i in 1:nrow(aovAdjPValue)){
150 factmain = gsub(paste0("pval.",method,"."),"",rownames(aovAdjPValue)[i])
151 factsignif = aovAdjPValue[i,which(aovAdjPValue[i,]<=threshold),drop=FALSE]
152 if((ncol(factsignif)!=0)&(factmain%in%colnames(sampleinfoTab))){
153 for(j in 1:ncol(factsignif)){
154 varsignif = gsub(" Response ","",colnames(factsignif)[j])
155 boxplot(as.formula(paste0("data$",varsignif," ~ sampleinfoTab$",factmain)),
156 main=paste0(factmain,"\n",varsignif), col="grey", mai=7)
157 }
158 }
159 }
160 dev.off()
161
162 # summary for significant variables
163 cat("\n\n- - - - - - - number of significant variables - - - - - - -\n\n")
164 for(i in 1:nrow(aovAdjPValue)){
165 cat(rownames(aovAdjPValue)[i],"-",
166 sum(aovAdjPValue[i,]<=threshold),"significant variable(s)\n")
167 }
168 cat("\nIf some of your factors are missing here, this may be due to\neffects",
169 "not estimable; your design may not be balanced enough.\n")
170
111 # -- output / return -- 171 # -- output / return --
112 write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA) 172 write.table(varinfoTab, outputdatapvalue, sep=sep, quote=F, row.names=FALSE)
113 write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA) 173
114 174 # log
115 # log
116 cat("\nthreshold:",threshold,"\n") 175 cat("\nthreshold:",threshold,"\n")
117 cat("result:",nrow(datafiltered),"/",nrow(data),"\n") 176 cat("result:",ncol(datafiltered),"/",ncol(data),"\n\n")
118 177
119 quit("no",status=0) 178 quit("no",status=0)
120 } 179 }
121 180
122 # log 181 # log
123 cat("ANOVA\n\n") 182 cat("ANOVA\n\n")
125 args <- commandArgs(trailingOnly = TRUE) 184 args <- commandArgs(trailingOnly = TRUE)
126 print(args) 185 print(args)
127 186
128 listArguments = parseCommandArgs(evaluate=FALSE) 187 listArguments = parseCommandArgs(evaluate=FALSE)
129 do.call(anova, listArguments) 188 do.call(anova, listArguments)
130
131
132