Mercurial > repos > ecology > pampa_glmcomm
comparison FunctExeCalcGLMGalaxy.r @ 0:f0dc3958e65d draft
"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 07f1028cc764f920b1e6419c151f04ab4e3600fa"
author | ecology |
---|---|
date | Tue, 21 Jul 2020 06:00:31 -0400 |
parents | |
children | 44b9069775ca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f0dc3958e65d |
---|---|
1 #Rscript | |
2 | |
3 ##################################################################################################################### | |
4 ##################################################################################################################### | |
5 ################################# Compute a Generalized Linear Model from your data ################################# | |
6 ##################################################################################################################### | |
7 ##################################################################################################################### | |
8 | |
9 ###################### Packages | |
10 suppressMessages(library(multcomp)) | |
11 suppressMessages(library(glmmTMB)) ###Version: 0.2.3 | |
12 suppressMessages(library(gap)) | |
13 | |
14 ###################### Load arguments and declaring variables | |
15 | |
16 args = commandArgs(trailingOnly=TRUE) | |
17 #options(encoding = "UTF-8") | |
18 | |
19 if (length(args) < 10) { | |
20 stop("At least 4 arguments must be supplied : \n- two input dataset files (.tabular) : metrics table and unitobs table \n- Interest variable field from metrics table \n- Response variable from unitobs table.", call.=FALSE) #si pas d'arguments -> affiche erreur et quitte / if no args -> error and exit1 | |
21 | |
22 } else { | |
23 Importdata <- args[1] ###### file name : metrics table | |
24 ImportUnitobs <- args[2] ###### file name : unitobs informations | |
25 colmetric <- as.numeric(args[3]) ###### Selected interest metric for GLM | |
26 listFact <- strsplit(args [4],",")[[1]] ###### Selected response factors for GLM | |
27 listRand <- strsplit(args [5],",")[[1]] ###### Selected randomized response factors for GLM | |
28 colFactAna <- args[6] ####### (optional) Selected splitting factors for GLMs | |
29 Distrib <- args[7] ###### (optional) Selected distribution for GLM | |
30 log <- args[8] ###### (Optional) Log on interest metric ? | |
31 aggreg <- args[9] ###### Aggregation level of the data table | |
32 source(args[10]) ###### Import functions | |
33 | |
34 } | |
35 #### Data must be a dataframe with at least 3 variables : unitobs representing location and year ("observation.unit"), species code ("species.code") and abundance ("number") | |
36 | |
37 | |
38 #Import des données / Import data | |
39 obs<- read.table(Importdata,sep="\t",dec=".",header=TRUE,encoding="UTF-8") # | |
40 obs[obs == -999] <- NA | |
41 metric <- colnames(obs)[colmetric] | |
42 tabUnitobs <- read.table(ImportUnitobs,sep="\t",dec=".",header=TRUE,encoding="UTF-8") | |
43 tabUnitobs[tabUnitobs == -999] <- NA | |
44 | |
45 if (colFactAna != "None") | |
46 { | |
47 FactAna <- colnames(tabUnitobs)[as.numeric(colFactAna)] | |
48 if (class(tabUnitobs[FactAna]) == "numeric" || FactAna == "observation.unit"){stop("Wrong chosen separation factor : Analysis can't be separated by observation unit or numeric factor")} | |
49 }else{ | |
50 FactAna <- colFactAna | |
51 } | |
52 | |
53 | |
54 #factors <- fact.det.f(Obs=obs) | |
55 | |
56 vars_data1<- NULL | |
57 err_msg_data1<-"The input metrics dataset doesn't have the right format. It needs to have at least the following 2 variables :\n- observation.unit (or year and site)\n- numeric or integer metric\n" | |
58 check_file(obs,err_msg_data1,vars_data1,2) | |
59 | |
60 vars_data2 <- c(listFact,listRand) | |
61 err_msg_data2<-"The input unitobs dataset doesn't have the right format. It needs to have at least the following 2 variables :\n- observation.unit (or year and site)\n- factors used in GLM (habitat, year and/or site)\n" | |
62 check_file(tabUnitobs,err_msg_data2,vars_data2[vars_data2 != "None"],2) | |
63 | |
64 #################################################################################################### | |
65 ########## Computing Generalized Linear Model ## Function : modeleLineaireWP2.unitobs.f ############ | |
66 #################################################################################################### | |
67 | |
68 modeleLineaireWP2.unitobs.f <- function(metrique, listFact, listRand, FactAna, Distrib, log=FALSE, tabMetrics, tableMetrique, tabUnitobs, unitobs="observation.unit", nbName="number") | |
69 { | |
70 ## Purpose: Monitoring steps for GLM on unitobs | |
71 ## ---------------------------------------------------------------------- | |
72 ## Arguments: metrique : selected metric | |
73 ## listFact : Factors for GLM | |
74 ## listRand : Random factors for GLM | |
75 ## factAna : Separation factor for GLMs | |
76 ## Distrib : selected distribution for model | |
77 ## log : log transformation on data ? boolean | |
78 ## tabMetrics : data table metrics | |
79 ## tableMetrique : data table's name | |
80 ## tabUnitobs : data table unitobs | |
81 ## ---------------------------------------------------------------------- | |
82 ## Author: Yves Reecht, Date: 18 août 2010, 15:59 modified by Coline ROYAUX 04 june 2020 | |
83 | |
84 tmpData <- tabMetrics | |
85 | |
86 if (listRand[1] != "None") | |
87 { | |
88 if (all(is.element(listFact,listRand)) || listFact[1] == "None") | |
89 { | |
90 RespFact <- paste("(1|",paste(listRand,collapse=") + (1|"),")") | |
91 listF <- NULL | |
92 listFact <- listRand | |
93 }else{ | |
94 listF <- listFact[!is.element(listFact,listRand)] | |
95 RespFact <- paste(paste(listF, collapse=" + ")," + (1|",paste(listRand,collapse=") + (1|"),")") | |
96 listFact <- c(listF,listRand) | |
97 } | |
98 }else{ | |
99 listF <- listFact | |
100 RespFact <- paste(listFact, collapse=" + ") | |
101 } | |
102 | |
103 ##Creating model's expression : | |
104 | |
105 #if (log == FALSE) { | |
106 exprML <- eval(parse(text=paste(metrique, "~", RespFact))) | |
107 #}else{ | |
108 # exprML <- eval(parse(text=paste("log(",metrique,")", "~", RespFact))) | |
109 #} | |
110 | |
111 ##Creating analysis table : | |
112 | |
113 listFactTab <- c(listFact, FactAna) | |
114 listFactTab <- listFactTab[listFactTab != "None"] | |
115 | |
116 if (all(is.na(match(tmpData[,unitobs],tabUnitobs[,unitobs])))) {stop("Observation units doesn't match in the two input tables")} | |
117 | |
118 if(! is.element("species.code",colnames(tmpData))) | |
119 { | |
120 col <- c(unitobs,metrique) | |
121 tmpData <- cbind(tmpData[,col], tabUnitobs[match(tmpData[,unitobs],tabUnitobs[,unitobs]),listFactTab]) | |
122 colnames(tmpData) <- c(col,listFactTab) | |
123 | |
124 for (i in listFactTab) { | |
125 switch(i, | |
126 tmpData[,i] <- as.factor(tmpData[,i])) | |
127 } | |
128 }else{ | |
129 stop("Warning : wrong data frame, data frame should be aggregated by observation unit (year and site)") | |
130 } | |
131 | |
132 ## Suppress unsed 'levels' : | |
133 tmpData <- dropLevels.f(tmpData) | |
134 | |
135 ## Automatic choice of distribution if none is selected by user : | |
136 if (Distrib == "None") | |
137 { | |
138 switch(class(tmpData[,metrique]), | |
139 "integer"={loiChoisie <- "poisson"}, | |
140 "numeric"={loiChoisie <- "gaussian"}, | |
141 stop("Selected metric class doesn't fit, you should select an integer or a numeric variable")) | |
142 }else{ | |
143 loiChoisie <- Distrib | |
144 } | |
145 | |
146 | |
147 ## Compute Model(s) : | |
148 | |
149 if (FactAna != "None" && nlevels(tmpData[,FactAna]) > 1) | |
150 { | |
151 Anacut <- levels(tmpData[,FactAna]) | |
152 }else{ | |
153 Anacut <- NULL | |
154 } | |
155 | |
156 ##Create results table : | |
157 lev <- unlist(lapply(listF,FUN=function(x){levels(tmpData[,x])})) | |
158 | |
159 if (listRand[1] != "None") ## if random effects | |
160 { | |
161 TabSum <- data.frame(analysis=c("global", Anacut),AIC=NA,BIC=NA,logLik=NA, deviance=NA,df.resid=NA) | |
162 colrand <- unlist(lapply(listRand, | |
163 FUN=function(x){lapply(c("Std.Dev","NbObservation","NbLevels"), | |
164 FUN=function(y){paste(x,y,collapse = ":") | |
165 }) | |
166 })) | |
167 TabSum[,colrand] <- NA | |
168 | |
169 if (! is.null(lev)) ## if fixed effects + random effects | |
170 { | |
171 colcoef <- unlist(lapply(c("(Intercept)",lev), | |
172 FUN=function(x){lapply(c("Estimate","Std.Err","Zvalue","Pvalue","signif"), | |
173 FUN=function(y){paste(x,y,collapse = ":") | |
174 }) | |
175 })) | |
176 }else{ ## if no fixed effects | |
177 colcoef <- NULL | |
178 } | |
179 | |
180 }else{ ## if no random effects | |
181 TabSum <- data.frame(analysis=c("global", Anacut),AIC=NA,Resid.deviance=NA,df.resid=NA,Null.deviance=NA,df.null=NA) | |
182 | |
183 switch(loiChoisie, | |
184 "gaussian"={colcoef <- unlist(lapply(c("(Intercept)",lev), | |
185 FUN=function(x){lapply(c("Estimate","Std.Err","Tvalue","Pvalue","signif"), | |
186 FUN=function(y){paste(x,y,collapse = ":") | |
187 }) | |
188 }))}, | |
189 "quasipoisson"={colcoef <- unlist(lapply(c("(Intercept)",lev), | |
190 FUN=function(x){lapply(c("Estimate","Std.Err","Tvalue","Pvalue","signif"), | |
191 FUN=function(y){paste(x,y,collapse = ":") | |
192 }) | |
193 }))}, | |
194 colcoef <- unlist(lapply(c("(Intercept)",lev), | |
195 FUN=function(x){lapply(c("Estimate","Std.Err","Zvalue","Pvalue","signif"), | |
196 FUN=function(y){paste(x,y,collapse = ":") | |
197 }) | |
198 }))) | |
199 | |
200 } | |
201 | |
202 TabSum[,colcoef] <- NA | |
203 | |
204 ### creating rate table | |
205 TabRate <- data.frame(analysis=c("global", Anacut), complete_plan=NA, balanced_plan=NA, NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA) | |
206 | |
207 for (cut in Anacut) | |
208 { | |
209 cutData <- tmpData[grep(cut,tmpData[,FactAna]),] | |
210 cutData <- dropLevels.f(cutData) | |
211 | |
212 res <-"" | |
213 | |
214 if (listRand[1] != "None") | |
215 { | |
216 res <- tryCatch(glmmTMB(exprML,family=loiChoisie, data=cutData), error=function(e){}) | |
217 }else{ | |
218 res <- tryCatch(glm(exprML,data=cutData,family=loiChoisie), error=function(e){}) | |
219 } | |
220 | |
221 ## Write results : | |
222 if (! is.null(res)) | |
223 { | |
224 TabSum <- sortiesLM.f(objLM=res, TabSum=TabSum, metrique=metrique, | |
225 factAna=factAna, cut=cut, colAna="analysis", lev=lev, #modSel=iFactGraphSel, listFactSel=listFactSel, | |
226 listFact=listFact, | |
227 Data=cutData, #Log=Log, | |
228 type=ifelse(tableMetrique == "unitSpSz" && factAna != "size.class", | |
229 "CL_unitobs", | |
230 "unitobs")) | |
231 | |
232 TabRate[TabRate[,"analysis"]==cut,c(2:11)] <- noteGLM.f(data=cutData, objLM=res, metric=metrique, listFact=listFact, details=TRUE) | |
233 | |
234 }else{ | |
235 cat("\nCannot compute GLM for level",cut,"Check if one or more factor(s) have only one level, or try with another distribution for the model in advanced settings \n\n") | |
236 } | |
237 | |
238 } | |
239 | |
240 ## Global analysis : | |
241 | |
242 if (listRand[1] != "None") | |
243 { | |
244 resG <- glmmTMB(exprML,family=loiChoisie, data=tmpData) | |
245 }else{ | |
246 resG <- glm(exprML,data=tmpData,family=loiChoisie) | |
247 } | |
248 | |
249 ## write results : | |
250 TabSum <- sortiesLM.f(objLM=resG, TabSum=TabSum, metrique=metrique, | |
251 factAna=factAna, cut="global", colAna="analysis", lev=lev, #modSel=iFactGraphSel, listFactSel=listFactSel, | |
252 listFact=listFact, | |
253 Data=tmpData, #Log=Log, | |
254 type=ifelse(tableMetrique == "unitSpSz" && factAna != "size.class", | |
255 "CL_unitobs", | |
256 "unitobs")) | |
257 | |
258 TabRate[TabRate[,"analysis"]=="global",c(2:11)] <- noteGLM.f(data=tmpData, objLM=resG, metric=metrique, listFact=listFact, details=TRUE) | |
259 noteGLMs.f(tabRate=TabRate,exprML=exprML,objLM=resG, file_out=TRUE) | |
260 ## simple statistics and infos : | |
261 filename <- "GLMSummaryFull.txt" | |
262 | |
263 ## Save data on model : | |
264 | |
265 infoStats.f(filename=filename, Data=tmpData, agregLevel=aggreg, type="stat", | |
266 metrique=metrique, factGraph=factAna, #factGraphSel=modSel, | |
267 listFact=listFact)#, listFactSel=listFactSel) | |
268 | |
269 return(TabSum) | |
270 | |
271 } | |
272 | |
273 ################# Analysis | |
274 | |
275 Tab <- modeleLineaireWP2.unitobs.f(metrique=metric, listFact=listFact, listRand=listRand, FactAna=FactAna, Distrib=Distrib, log=log, tabMetrics=obs, tableMetrique=aggreg, tabUnitobs=tabUnitobs, nbName="number") | |
276 | |
277 write.table(Tab,"GLMSummary.tabular", row.names=FALSE, sep="\t", dec=".",fileEncoding="UTF-8") |