comparison FunctExeCalcGLMGalaxy.r @ 0:f0dc3958e65d draft

"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 07f1028cc764f920b1e6419c151f04ab4e3600fa"
author ecology
date Tue, 21 Jul 2020 06:00:31 -0400
parents
children 44b9069775ca
comparison
equal deleted inserted replaced
-1:000000000000 0:f0dc3958e65d
1 #Rscript
2
3 #####################################################################################################################
4 #####################################################################################################################
5 ################################# Compute a Generalized Linear Model from your data #################################
6 #####################################################################################################################
7 #####################################################################################################################
8
9 ###################### Packages
10 suppressMessages(library(multcomp))
11 suppressMessages(library(glmmTMB)) ###Version: 0.2.3
12 suppressMessages(library(gap))
13
14 ###################### Load arguments and declaring variables
15
16 args = commandArgs(trailingOnly=TRUE)
17 #options(encoding = "UTF-8")
18
19 if (length(args) < 10) {
20 stop("At least 4 arguments must be supplied : \n- two input dataset files (.tabular) : metrics table and unitobs table \n- Interest variable field from metrics table \n- Response variable from unitobs table.", call.=FALSE) #si pas d'arguments -> affiche erreur et quitte / if no args -> error and exit1
21
22 } else {
23 Importdata <- args[1] ###### file name : metrics table
24 ImportUnitobs <- args[2] ###### file name : unitobs informations
25 colmetric <- as.numeric(args[3]) ###### Selected interest metric for GLM
26 listFact <- strsplit(args [4],",")[[1]] ###### Selected response factors for GLM
27 listRand <- strsplit(args [5],",")[[1]] ###### Selected randomized response factors for GLM
28 colFactAna <- args[6] ####### (optional) Selected splitting factors for GLMs
29 Distrib <- args[7] ###### (optional) Selected distribution for GLM
30 log <- args[8] ###### (Optional) Log on interest metric ?
31 aggreg <- args[9] ###### Aggregation level of the data table
32 source(args[10]) ###### Import functions
33
34 }
35 #### Data must be a dataframe with at least 3 variables : unitobs representing location and year ("observation.unit"), species code ("species.code") and abundance ("number")
36
37
38 #Import des données / Import data
39 obs<- read.table(Importdata,sep="\t",dec=".",header=TRUE,encoding="UTF-8") #
40 obs[obs == -999] <- NA
41 metric <- colnames(obs)[colmetric]
42 tabUnitobs <- read.table(ImportUnitobs,sep="\t",dec=".",header=TRUE,encoding="UTF-8")
43 tabUnitobs[tabUnitobs == -999] <- NA
44
45 if (colFactAna != "None")
46 {
47 FactAna <- colnames(tabUnitobs)[as.numeric(colFactAna)]
48 if (class(tabUnitobs[FactAna]) == "numeric" || FactAna == "observation.unit"){stop("Wrong chosen separation factor : Analysis can't be separated by observation unit or numeric factor")}
49 }else{
50 FactAna <- colFactAna
51 }
52
53
54 #factors <- fact.det.f(Obs=obs)
55
56 vars_data1<- NULL
57 err_msg_data1<-"The input metrics dataset doesn't have the right format. It needs to have at least the following 2 variables :\n- observation.unit (or year and site)\n- numeric or integer metric\n"
58 check_file(obs,err_msg_data1,vars_data1,2)
59
60 vars_data2 <- c(listFact,listRand)
61 err_msg_data2<-"The input unitobs dataset doesn't have the right format. It needs to have at least the following 2 variables :\n- observation.unit (or year and site)\n- factors used in GLM (habitat, year and/or site)\n"
62 check_file(tabUnitobs,err_msg_data2,vars_data2[vars_data2 != "None"],2)
63
64 ####################################################################################################
65 ########## Computing Generalized Linear Model ## Function : modeleLineaireWP2.unitobs.f ############
66 ####################################################################################################
67
68 modeleLineaireWP2.unitobs.f <- function(metrique, listFact, listRand, FactAna, Distrib, log=FALSE, tabMetrics, tableMetrique, tabUnitobs, unitobs="observation.unit", nbName="number")
69 {
70 ## Purpose: Monitoring steps for GLM on unitobs
71 ## ----------------------------------------------------------------------
72 ## Arguments: metrique : selected metric
73 ## listFact : Factors for GLM
74 ## listRand : Random factors for GLM
75 ## factAna : Separation factor for GLMs
76 ## Distrib : selected distribution for model
77 ## log : log transformation on data ? boolean
78 ## tabMetrics : data table metrics
79 ## tableMetrique : data table's name
80 ## tabUnitobs : data table unitobs
81 ## ----------------------------------------------------------------------
82 ## Author: Yves Reecht, Date: 18 août 2010, 15:59 modified by Coline ROYAUX 04 june 2020
83
84 tmpData <- tabMetrics
85
86 if (listRand[1] != "None")
87 {
88 if (all(is.element(listFact,listRand)) || listFact[1] == "None")
89 {
90 RespFact <- paste("(1|",paste(listRand,collapse=") + (1|"),")")
91 listF <- NULL
92 listFact <- listRand
93 }else{
94 listF <- listFact[!is.element(listFact,listRand)]
95 RespFact <- paste(paste(listF, collapse=" + ")," + (1|",paste(listRand,collapse=") + (1|"),")")
96 listFact <- c(listF,listRand)
97 }
98 }else{
99 listF <- listFact
100 RespFact <- paste(listFact, collapse=" + ")
101 }
102
103 ##Creating model's expression :
104
105 #if (log == FALSE) {
106 exprML <- eval(parse(text=paste(metrique, "~", RespFact)))
107 #}else{
108 # exprML <- eval(parse(text=paste("log(",metrique,")", "~", RespFact)))
109 #}
110
111 ##Creating analysis table :
112
113 listFactTab <- c(listFact, FactAna)
114 listFactTab <- listFactTab[listFactTab != "None"]
115
116 if (all(is.na(match(tmpData[,unitobs],tabUnitobs[,unitobs])))) {stop("Observation units doesn't match in the two input tables")}
117
118 if(! is.element("species.code",colnames(tmpData)))
119 {
120 col <- c(unitobs,metrique)
121 tmpData <- cbind(tmpData[,col], tabUnitobs[match(tmpData[,unitobs],tabUnitobs[,unitobs]),listFactTab])
122 colnames(tmpData) <- c(col,listFactTab)
123
124 for (i in listFactTab) {
125 switch(i,
126 tmpData[,i] <- as.factor(tmpData[,i]))
127 }
128 }else{
129 stop("Warning : wrong data frame, data frame should be aggregated by observation unit (year and site)")
130 }
131
132 ## Suppress unsed 'levels' :
133 tmpData <- dropLevels.f(tmpData)
134
135 ## Automatic choice of distribution if none is selected by user :
136 if (Distrib == "None")
137 {
138 switch(class(tmpData[,metrique]),
139 "integer"={loiChoisie <- "poisson"},
140 "numeric"={loiChoisie <- "gaussian"},
141 stop("Selected metric class doesn't fit, you should select an integer or a numeric variable"))
142 }else{
143 loiChoisie <- Distrib
144 }
145
146
147 ## Compute Model(s) :
148
149 if (FactAna != "None" && nlevels(tmpData[,FactAna]) > 1)
150 {
151 Anacut <- levels(tmpData[,FactAna])
152 }else{
153 Anacut <- NULL
154 }
155
156 ##Create results table :
157 lev <- unlist(lapply(listF,FUN=function(x){levels(tmpData[,x])}))
158
159 if (listRand[1] != "None") ## if random effects
160 {
161 TabSum <- data.frame(analysis=c("global", Anacut),AIC=NA,BIC=NA,logLik=NA, deviance=NA,df.resid=NA)
162 colrand <- unlist(lapply(listRand,
163 FUN=function(x){lapply(c("Std.Dev","NbObservation","NbLevels"),
164 FUN=function(y){paste(x,y,collapse = ":")
165 })
166 }))
167 TabSum[,colrand] <- NA
168
169 if (! is.null(lev)) ## if fixed effects + random effects
170 {
171 colcoef <- unlist(lapply(c("(Intercept)",lev),
172 FUN=function(x){lapply(c("Estimate","Std.Err","Zvalue","Pvalue","signif"),
173 FUN=function(y){paste(x,y,collapse = ":")
174 })
175 }))
176 }else{ ## if no fixed effects
177 colcoef <- NULL
178 }
179
180 }else{ ## if no random effects
181 TabSum <- data.frame(analysis=c("global", Anacut),AIC=NA,Resid.deviance=NA,df.resid=NA,Null.deviance=NA,df.null=NA)
182
183 switch(loiChoisie,
184 "gaussian"={colcoef <- unlist(lapply(c("(Intercept)",lev),
185 FUN=function(x){lapply(c("Estimate","Std.Err","Tvalue","Pvalue","signif"),
186 FUN=function(y){paste(x,y,collapse = ":")
187 })
188 }))},
189 "quasipoisson"={colcoef <- unlist(lapply(c("(Intercept)",lev),
190 FUN=function(x){lapply(c("Estimate","Std.Err","Tvalue","Pvalue","signif"),
191 FUN=function(y){paste(x,y,collapse = ":")
192 })
193 }))},
194 colcoef <- unlist(lapply(c("(Intercept)",lev),
195 FUN=function(x){lapply(c("Estimate","Std.Err","Zvalue","Pvalue","signif"),
196 FUN=function(y){paste(x,y,collapse = ":")
197 })
198 })))
199
200 }
201
202 TabSum[,colcoef] <- NA
203
204 ### creating rate table
205 TabRate <- data.frame(analysis=c("global", Anacut), complete_plan=NA, balanced_plan=NA, NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA)
206
207 for (cut in Anacut)
208 {
209 cutData <- tmpData[grep(cut,tmpData[,FactAna]),]
210 cutData <- dropLevels.f(cutData)
211
212 res <-""
213
214 if (listRand[1] != "None")
215 {
216 res <- tryCatch(glmmTMB(exprML,family=loiChoisie, data=cutData), error=function(e){})
217 }else{
218 res <- tryCatch(glm(exprML,data=cutData,family=loiChoisie), error=function(e){})
219 }
220
221 ## Write results :
222 if (! is.null(res))
223 {
224 TabSum <- sortiesLM.f(objLM=res, TabSum=TabSum, metrique=metrique,
225 factAna=factAna, cut=cut, colAna="analysis", lev=lev, #modSel=iFactGraphSel, listFactSel=listFactSel,
226 listFact=listFact,
227 Data=cutData, #Log=Log,
228 type=ifelse(tableMetrique == "unitSpSz" && factAna != "size.class",
229 "CL_unitobs",
230 "unitobs"))
231
232 TabRate[TabRate[,"analysis"]==cut,c(2:11)] <- noteGLM.f(data=cutData, objLM=res, metric=metrique, listFact=listFact, details=TRUE)
233
234 }else{
235 cat("\nCannot compute GLM for level",cut,"Check if one or more factor(s) have only one level, or try with another distribution for the model in advanced settings \n\n")
236 }
237
238 }
239
240 ## Global analysis :
241
242 if (listRand[1] != "None")
243 {
244 resG <- glmmTMB(exprML,family=loiChoisie, data=tmpData)
245 }else{
246 resG <- glm(exprML,data=tmpData,family=loiChoisie)
247 }
248
249 ## write results :
250 TabSum <- sortiesLM.f(objLM=resG, TabSum=TabSum, metrique=metrique,
251 factAna=factAna, cut="global", colAna="analysis", lev=lev, #modSel=iFactGraphSel, listFactSel=listFactSel,
252 listFact=listFact,
253 Data=tmpData, #Log=Log,
254 type=ifelse(tableMetrique == "unitSpSz" && factAna != "size.class",
255 "CL_unitobs",
256 "unitobs"))
257
258 TabRate[TabRate[,"analysis"]=="global",c(2:11)] <- noteGLM.f(data=tmpData, objLM=resG, metric=metrique, listFact=listFact, details=TRUE)
259 noteGLMs.f(tabRate=TabRate,exprML=exprML,objLM=resG, file_out=TRUE)
260 ## simple statistics and infos :
261 filename <- "GLMSummaryFull.txt"
262
263 ## Save data on model :
264
265 infoStats.f(filename=filename, Data=tmpData, agregLevel=aggreg, type="stat",
266 metrique=metrique, factGraph=factAna, #factGraphSel=modSel,
267 listFact=listFact)#, listFactSel=listFactSel)
268
269 return(TabSum)
270
271 }
272
273 ################# Analysis
274
275 Tab <- modeleLineaireWP2.unitobs.f(metrique=metric, listFact=listFact, listRand=listRand, FactAna=FactAna, Distrib=Distrib, log=log, tabMetrics=obs, tableMetrique=aggreg, tabUnitobs=tabUnitobs, nbName="number")
276
277 write.table(Tab,"GLMSummary.tabular", row.names=FALSE, sep="\t", dec=".",fileEncoding="UTF-8")