Mercurial > repos > lecorguille > anova
comparison abims_anova.r @ 10:b147b17759a6 draft
planemo upload for repository https://github.com/workflow4metabolomics/anova commit 28838bb8dafd6d286157db77f181ed8a1b586664
author | lecorguille |
---|---|
date | Wed, 28 Feb 2018 07:44:13 -0500 |
parents | 8dd2a438bfba |
children | 102049093b7d |
comparison
equal
deleted
inserted
replaced
9:4f06e1796334 | 10:b147b17759a6 |
---|---|
1 #!/usr/local/public/bin/Rscript | 1 #!/usr/local/public/bin/Rscript |
2 # version="1.1" | 2 # version="1.1" |
3 | 3 |
4 # date: 06-06-2012 | 4 # date: 06-06-2012 |
5 # update: 18-02-2014 | 5 # update: 18-02-2014 |
6 # **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr | 6 # **Authors** Gildas Le Corguille ABiMS - UPMC/CNRS - Station Biologique de Roscoff - gildas.lecorguille|at|sb-roscoff.fr |
7 | 7 |
8 # abims_anova.r version 20140218 | 8 # abims_anova.r version 20140218 |
9 | 9 |
10 library(batch) | 10 library(batch) |
11 | 11 |
12 | 12 |
13 # function avova | 13 # function avova |
14 anova = function (file, sampleinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatafiltered="anova.datafiltered.output") { | 14 anova = function (file, sampleinfo, varinfo, mode="column", condition=1, interaction=F, method="BH", threshold=0.01, selection_method="intersection", sep=";", dec=".", outputdatapvalue="anova.data.output", outputdatasignif="anova.datasignif.output") { |
15 | 15 |
16 | 16 |
17 if (sep=="tabulation") sep="\t" | 17 if (sep=="tabulation") sep="\t" |
18 if (sep=="semicolon") sep=";" | 18 if (sep=="semicolon") sep=";" |
19 if (sep=="comma") sep="," | 19 if (sep=="comma") sep="," |
20 | 20 |
21 anova_formula_operator = "+" | 21 anova_formula_operator = "+" |
22 if (interaction) anova_formula_operator = "*" | 22 if (interaction) anova_formula_operator = "*" |
23 | 23 |
24 # -- import -- | 24 # -- import -- |
25 data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA") | 25 data=read.table(file, header = TRUE, row.names=1, sep = sep, quote="\"", dec = dec, fill = TRUE, comment.char="",na.strings = "NA", check.names=FALSE) |
26 | 26 |
27 if (mode == "row") data=t(data) | 27 if (mode == "row") data=t(data) |
28 | 28 |
29 sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"") | 29 sampleinfoTab=read.table(sampleinfo, header = TRUE, row.names=1, sep = sep, quote="\"") |
30 rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab)) | 30 rownames(sampleinfoTab) = make.names(rownames(sampleinfoTab)) |
31 | 31 |
32 | 32 varinfoTab=read.table(varinfo, header = TRUE, sep = sep, quote="\"") |
33 if(sum(colnames(data)!=varinfoTab[,1])!=0){ # if ID not exactly identical | |
34 if(sum(colnames(data)[order(colnames(data))]!=varinfoTab[order(varinfoTab[,1]),1])==0){ | |
35 # reorder data matrix to match variable metadata order | |
36 data = data[,match(varinfoTab[,1],colnames(data))] | |
37 }else{ | |
38 stop(paste0("\nVariables' ID do not match between your data matrix and your variable", | |
39 "metadata file. \nPlease check your data.")) | |
40 } | |
41 } | |
42 | |
33 # -- group -- | 43 # -- group -- |
34 match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab)) | 44 match_data_sampleinfoTab = match(rownames(data),rownames(sampleinfoTab)) |
35 if (sum(is.na(match_data_sampleinfoTab)) > 0) { | 45 if (sum(is.na(match_data_sampleinfoTab)) > 0) { |
36 write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr()) | 46 write("ERROR: There is a problem during to match sample names from the data matrix and from the sample info (presence of NA).", stderr()) |
37 write("You may need to use change the mode (column/row)", stderr()) | 47 write("You may need to use change the mode (column/row)", stderr()) |
39 write(head(colnames(data)), stderr()) | 49 write(head(colnames(data)), stderr()) |
40 write("10 first sample names in the sample info:", stderr()) | 50 write("10 first sample names in the sample info:", stderr()) |
41 write(head(rownames(sampleinfoTab)), stderr()) | 51 write(head(rownames(sampleinfoTab)), stderr()) |
42 quit("no",status=10) | 52 quit("no",status=10) |
43 } | 53 } |
44 | 54 |
45 | 55 |
46 # -- anova -- | 56 # -- anova -- |
47 | 57 |
48 # formula | 58 # formula |
49 grps=list() | 59 grps=list() |
50 anova_formula_s = "data ~ " | 60 anova_formula_s = "data ~ " |
51 cat("\ncontrasts:\n") | 61 cat("\ncontrasts:\n") |
52 for (i in 1:length(condition)) { | 62 for (i in 1:length(condition)) { |
58 } | 68 } |
59 anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1) | 69 anova_formula_s = substr(anova_formula_s, 1, nchar(anova_formula_s)-1) |
60 anova_formula = as.formula(anova_formula_s) | 70 anova_formula = as.formula(anova_formula_s) |
61 | 71 |
62 | 72 |
63 | 73 |
64 # anova | 74 # anova |
65 manovaObjectList = manova(anova_formula) | 75 manovaObjectList = manova(anova_formula) |
66 manovaList = summary.aov(manovaObjectList) | 76 manovaList = summary.aov(manovaObjectList) |
67 | 77 |
68 # condition renaming | 78 # condition renaming |
69 manovaRownames = gsub(" ","",rownames(manovaList[[1]])) | 79 manovaRownames = gsub(" ","",rownames(manovaList[[1]])) |
70 manovaNbrPvalue = length(manovaRownames)-1 | 80 manovaNbrPvalue = length(manovaRownames)-1 |
71 manovaRownames = manovaRownames[-(manovaNbrPvalue+1)] | 81 manovaRownames = manovaRownames[-(manovaNbrPvalue+1)] |
72 | 82 |
73 for (i in 1:length(condition)) { | 83 for (i in 1:length(condition)) { |
74 manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames) | 84 manovaRownames = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],manovaRownames) |
75 anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s) | 85 anova_formula_s = sub(paste("grps\\[\\[",i,"\\]\\]",sep=""),condition[i],anova_formula_s) |
76 } | 86 } |
77 | 87 |
78 # log | 88 # log |
79 cat("\nanova_formula",anova_formula_s,"\n") | 89 cat("\nanova_formula",anova_formula_s,"\n") |
80 | 90 |
81 # p-value | 91 # p-value |
82 aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]}) | 92 aovPValue = sapply(manovaList,function(x){x[-(manovaNbrPvalue+1),5]}) |
83 if(length(condition) == 1) aovPValue = t(aovPValue) | 93 if(length(condition) == 1) aovPValue = t(aovPValue) |
84 rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="") | 94 rownames(aovPValue) = paste("pvalue_",manovaRownames,sep="") |
85 | 95 |
86 # p-value adjusted | 96 # p-value adjusted |
87 if(length(condition) == 1) { | 97 if(length(condition) == 1) { |
88 aovAdjPValue = t(p.adjust(aovPValue,method=method)) | 98 aovAdjPValue = t(p.adjust(aovPValue,method=method)) |
89 } else { | 99 } else { |
90 aovAdjPValue = apply(aovPValue,2,p.adjust, method=method) | 100 aovAdjPValue = t(apply(aovPValue,1,p.adjust, method=method)) |
91 } | 101 } |
92 rownames(aovAdjPValue) = paste("pvalueadjusted.",method,".",manovaRownames,sep="") | 102 rownames(aovAdjPValue) = paste("pval.",method,".",manovaRownames,sep="") |
93 | 103 |
94 # selection | 104 # selection |
95 colSumThreshold = colSums(aovAdjPValue <= threshold) | 105 colSumThreshold = colSums(aovAdjPValue <= threshold) |
96 if (selection_method == "intersection") { | 106 if (selection_method == "intersection") { |
97 datafiltered = data[,colSumThreshold == nrow(aovAdjPValue )] | 107 datafiltered = data[,colSumThreshold == nrow(aovAdjPValue ), drop=FALSE] |
98 } else { | 108 } else { |
99 datafiltered = data[,colSumThreshold != 0] | 109 datafiltered = data[,colSumThreshold != 0, drop=FALSE] |
100 } | 110 } |
101 | 111 selected.var = rep("no",ncol(data)) |
112 selected.var[colnames(data)%in%colnames(datafiltered)] = "yes" | |
113 | |
102 #data=rbind(data, aovPValue, aovAdjPValue) | 114 #data=rbind(data, aovPValue, aovAdjPValue) |
103 data=rbind(data, aovAdjPValue) | 115 varinfoTab=cbind(varinfoTab, round(t(aovAdjPValue),10), selected.var) |
104 | 116 |
105 | 117 # group means |
106 if (mode == "row") { | 118 for (i in 1:length(condition)) { |
107 data=t(data) | 119 for(j in levels(grps[[i]])){ |
108 datafiltered=t(datafiltered) | 120 subgp = rownames(sampleinfoTab[which(sampleinfoTab[,condition[i]]==j),]) |
121 modmean = colMeans(data[which(rownames(data)%in%subgp),],na.rm=TRUE) | |
122 varinfoTab=cbind(varinfoTab, modmean) | |
123 colnames(varinfoTab)[ncol(varinfoTab)] = paste0("Mean_",condition[i],".",j) | |
124 } | |
109 } | 125 } |
110 | 126 colnames(varinfoTab) = make.unique(colnames(varinfoTab)) |
127 | |
128 # pdf for significant variables | |
129 pdf(outputdatasignif) | |
130 ### Venn diagram | |
131 if(nrow(aovAdjPValue)>5){ | |
132 pie(100,labels=NA,main=paste0("Venn diagram only available for maximum 5 terms\n", | |
133 "(your analysis concerns ",nrow(aovAdjPValue)," terms)\n\n", | |
134 "Number of significant variables relatively to\nyour chosen threshold and ", | |
135 "selection method: ",ncol(datafiltered)),cex.main=0.8) | |
136 }else{ | |
137 vennlist = list(NULL) | |
138 names(vennlist) = rownames(aovAdjPValue)[1] | |
139 if(length(colnames(aovAdjPValue))==0){colnames(aovAdjPValue)=varinfoTab[,1]} | |
140 for(i in 1:nrow(aovAdjPValue)){ | |
141 vennlist[[rownames(aovAdjPValue)[i]]]=colnames(aovAdjPValue[i,which(aovAdjPValue[i,]<=threshold),drop=FALSE]) | |
142 } | |
143 if(length(vennlist)==0){ pie(100,labels=NA,main="No significant ions was found.") | |
144 }else{ library(venn) ; venn(vennlist, zcolor="style", cexil=2, cexsn=1.5) } | |
145 } | |
146 ### Boxplot | |
147 par(mfrow=c(2,2),mai=rep(0.5,4)) | |
148 data <- as.data.frame(data) | |
149 for(i in 1:nrow(aovAdjPValue)){ | |
150 factmain = gsub(paste0("pval.",method,"."),"",rownames(aovAdjPValue)[i]) | |
151 factsignif = aovAdjPValue[i,which(aovAdjPValue[i,]<=threshold),drop=FALSE] | |
152 if((ncol(factsignif)!=0)&(factmain%in%colnames(sampleinfoTab))){ | |
153 for(j in 1:ncol(factsignif)){ | |
154 varsignif = gsub(" Response ","",colnames(factsignif)[j]) | |
155 boxplot(as.formula(paste0("data$",varsignif," ~ sampleinfoTab$",factmain)), | |
156 main=paste0(factmain,"\n",varsignif), col="grey", mai=7) | |
157 } | |
158 } | |
159 } | |
160 dev.off() | |
161 | |
162 # summary for significant variables | |
163 cat("\n\n- - - - - - - number of significant variables - - - - - - -\n\n") | |
164 for(i in 1:nrow(aovAdjPValue)){ | |
165 cat(rownames(aovAdjPValue)[i],"-", | |
166 sum(aovAdjPValue[i,]<=threshold),"significant variable(s)\n") | |
167 } | |
168 cat("\nIf some of your factors are missing here, this may be due to\neffects", | |
169 "not estimable; your design may not be balanced enough.\n") | |
170 | |
111 # -- output / return -- | 171 # -- output / return -- |
112 write.table(data, outputdatapvalue, sep=sep, quote=F, col.names = NA) | 172 write.table(varinfoTab, outputdatapvalue, sep=sep, quote=F, row.names=FALSE) |
113 write.table(datafiltered, outputdatafiltered, sep=sep, quote=F, col.names = NA) | 173 |
114 | 174 # log |
115 # log | |
116 cat("\nthreshold:",threshold,"\n") | 175 cat("\nthreshold:",threshold,"\n") |
117 cat("result:",nrow(datafiltered),"/",nrow(data),"\n") | 176 cat("result:",ncol(datafiltered),"/",ncol(data),"\n\n") |
118 | 177 |
119 quit("no",status=0) | 178 quit("no",status=0) |
120 } | 179 } |
121 | 180 |
122 # log | 181 # log |
123 cat("ANOVA\n\n") | 182 cat("ANOVA\n\n") |
125 args <- commandArgs(trailingOnly = TRUE) | 184 args <- commandArgs(trailingOnly = TRUE) |
126 print(args) | 185 print(args) |
127 | 186 |
128 listArguments = parseCommandArgs(evaluate=FALSE) | 187 listArguments = parseCommandArgs(evaluate=FALSE) |
129 do.call(anova, listArguments) | 188 do.call(anova, listArguments) |
130 | |
131 | |
132 |