Mercurial > repos > ecology > pampa_glmcomm
comparison FunctPAMPAGalaxy.r @ 0:f0dc3958e65d draft
"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 07f1028cc764f920b1e6419c151f04ab4e3600fa"
author | ecology |
---|---|
date | Tue, 21 Jul 2020 06:00:31 -0400 |
parents | |
children | 44b9069775ca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:f0dc3958e65d |
---|---|
1 #Rscript | |
2 | |
3 | |
4 ################################################################################################################################## | |
5 ####################### PAMPA Galaxy tools functions : Calculate metrics, compute GLM and plot ################################# | |
6 ################################################################################################################################## | |
7 | |
8 #### Based on Yves Reecht R script | |
9 #### Modified by Coline ROYAUX for integrating within Galaxy-E | |
10 | |
11 ######################################### start of the function fact.def.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
12 ####### Define the finest aggregation with the observation table | |
13 | |
14 fact.det.f <- function (Obs, | |
15 size.class="size.class", | |
16 code.especes="species.code", | |
17 unitobs="observation.unit") | |
18 { | |
19 if (any(is.element(c(size.class), colnames(obs))) && all(! is.na(obs[, size.class]))) | |
20 { | |
21 factors <- c(unitobs, code.especes, size.class) | |
22 }else{ | |
23 factors <- c(unitobs, code.especes) | |
24 } | |
25 return(factors) | |
26 } | |
27 | |
28 ######################################### end of the function fact.def.f | |
29 | |
30 ######################################### start of the function def.typeobs.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
31 ####### Define observation type from colnames | |
32 | |
33 def.typeobs.f <- function(Obs) | |
34 { | |
35 if (any(is.element(c("rotation","rot","rotate"),colnames(obs)))) | |
36 { | |
37 ObsType <- "SVR" | |
38 }else{ | |
39 ObsType <- "other" | |
40 } | |
41 return(ObsType) | |
42 } | |
43 ######################################### end of the function fact.def.f | |
44 | |
45 ######################################### start of the function create.unitobs called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
46 ####### Create unitobs column when inexistant | |
47 create.unitobs <- function(data,year="year",point="point", unitobs="observation.unit") | |
48 { | |
49 if (is.element(paste(unitobs),colnames(data)) && all(grepl("[1-2][0|8|9][0-9]{2}_.*",data[,unitobs])==FALSE)) | |
50 { | |
51 unitab <- data | |
52 | |
53 }else{ | |
54 | |
55 unitab <- unite(data,col="observation.unit",c(year,point)) | |
56 } | |
57 return(unitab) | |
58 } | |
59 ######################################### start of the function create.unitobs | |
60 | |
61 ######################################### start of the function create.year.point called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
62 ####### separate unitobs column when existant | |
63 create.year.point <- function(data,year="year",point="point", unitobs="observation.unit") | |
64 { | |
65 if (all(grepl("[1-2][0|8|9][0-9]{2}_.*",data[,unitobs]))==TRUE) | |
66 { | |
67 tab <- separate(data,col=unitobs,into=c(year,point),sep="_") | |
68 }else{ | |
69 tab <- separate(data,col=unitobs,into=c("site1", year,"obs"),sep=c(2,4)) | |
70 tab <- unite(tab, col=point, c("site1","obs")) | |
71 | |
72 } | |
73 | |
74 tab <- cbind(tab,observation.unit = data[,unitobs]) | |
75 | |
76 return(tab) | |
77 } | |
78 ######################################### start of the function create.unitobs | |
79 | |
80 ######################################### start of the function check_file called by every Galaxy Rscripts | |
81 | |
82 check_file<-function(dataset,err_msg,vars,nb_vars){ | |
83 | |
84 ## Purpose: General function to check integrity of input file. Will | |
85 ## check numbers and contents of variables(colnames). | |
86 ## return an error message and exit if mismatch detected | |
87 ## ---------------------------------------------------------------------- | |
88 ## Arguments: dataset : dataset name | |
89 ## err_msg : output error | |
90 ## vars : expected name of variables | |
91 ## nb_vars : expected number of variables | |
92 ## ---------------------------------------------------------------------- | |
93 ## Author: Alan Amosse, Benjamin Yguel | |
94 | |
95 if(ncol(dataset) < nb_vars){ #checking for right number of columns in the file if not = error message | |
96 cat("\nerr nb var\n") | |
97 stop(err_msg, call.=FALSE) | |
98 } | |
99 | |
100 for(i in vars){ | |
101 if(!(i %in% names(dataset))){ #checking colnames | |
102 stop(err_msg,call.=FALSE) | |
103 } | |
104 } | |
105 } | |
106 | |
107 ######################################### end of the function check_file | |
108 | |
109 | |
110 ######################################### start of the function statRotationsNumber.f called by calc.numbers.f | |
111 | |
112 statRotationsNumber.f <- function(factors, obs) | |
113 { | |
114 ## Purpose: Computing abundance statistics by rotation (max, sd) | |
115 ## on SVR data | |
116 ## ---------------------------------------------------------------------- | |
117 ## Arguments: factors : Names of aggregation factors | |
118 ## obs : observation data | |
119 ## ---------------------------------------------------------------------- | |
120 ## Author: Yves Reecht, Date: 29 oct. 2012, 16:01 modified by Coline ROYAUX 04 june 2020 | |
121 | |
122 ## Identification of valid rotations : | |
123 if (is.element("observation.unit", factors)) | |
124 { | |
125 ## valid rotations (empty must be there as well) : | |
126 rotations <- tapply(obs$rotation, | |
127 as.list(obs[ , c("observation.unit", "rotation"), drop=FALSE]), | |
128 function(x)length(x) > 0) | |
129 | |
130 ## Changing NA rotations in FALSE : | |
131 rotations[is.na(rotations)] <- FALSE | |
132 }else{ | |
133 #stop(mltext("statRotations.err.1")) | |
134 } | |
135 | |
136 ## ########################################################### | |
137 ## Abundance per rotation at chosen aggregation factors : | |
138 nombresR <- tapply(obs$number, | |
139 as.list(obs[ , c(factors, "rotation"), drop=FALSE]), | |
140 function(x,...){ifelse(all(is.na(x)), NA, sum(x,...))}, | |
141 na.rm = TRUE) | |
142 | |
143 ## If valid rotation NA are considered 0 : | |
144 nombresR <- sweep(nombresR, | |
145 match(names(dimnames(rotations)), names(dimnames(nombresR)), nomatch=NULL), | |
146 rotations, # Tableau des secteurs valides (booléens). | |
147 function(x, y) | |
148 { | |
149 x[is.na(x) & y] <- 0 # Lorsque NA et secteur valide => 0. | |
150 return(x) | |
151 }) | |
152 | |
153 ## ################################################## | |
154 ## Statistics : | |
155 | |
156 ## Means : | |
157 nombresMean <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)), | |
158 function(x,...){ifelse(all(is.na(x)), NA, mean(x,...))}, na.rm=TRUE) | |
159 | |
160 ## Maxima : | |
161 nombresMax <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)), | |
162 function(x,...){ifelse(all(is.na(x)), NA, max(x,...))}, na.rm=TRUE) | |
163 | |
164 ## SD : | |
165 nombresSD <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)), | |
166 function(x,...){ifelse(all(is.na(x)), NA, sd(x,...))}, na.rm=TRUE) | |
167 | |
168 ## Valid rotations count : | |
169 nombresRotations <- apply(rotations, 1, sum, na.rm=TRUE) | |
170 | |
171 ## Results returned as list : | |
172 return(list(nombresMean=nombresMean, nombresMax=nombresMax, nombresSD=nombresSD, | |
173 nombresRotations=nombresRotations, nombresTot=nombresR)) | |
174 } | |
175 | |
176 ######################################### end of the function statRotationsNumber.f | |
177 | |
178 ######################################### start of the function calcNumber.default.f called by calc.numbers.f | |
179 | |
180 calcNumber.default.f <- function(obs, | |
181 factors=c("observation.unit", "species.code", "size.class"), | |
182 nbName="number") | |
183 { | |
184 ## Purpose : Compute abundances at finest aggregation | |
185 ## --------------------------------------------------------------------- | |
186 ## Arguments: obs : observation table | |
187 ## factors : aggregation factors | |
188 ## nbName : name of abundance column. | |
189 ## | |
190 ## Output: array with ndimensions = nfactors. | |
191 ## ---------------------------------------------------------------------- | |
192 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:38 modified by Coline ROYAUX 04 june 2020 | |
193 | |
194 ## Sum individuals number : | |
195 nbr <- tapply(obs[ , nbName], | |
196 as.list(obs[ , factors]), | |
197 sum, na.rm = TRUE) | |
198 | |
199 ## Absences as "true zero" : | |
200 nbr[is.na(nbr)] <- 0 | |
201 | |
202 return(nbr) | |
203 } | |
204 | |
205 ######################################### end of the function calcNumber.default.f | |
206 | |
207 ######################################### start of the function calc.numbers.f | |
208 | |
209 calc.numbers.f <- function(obs, ObsType="", factors=c("observation.unit", "species.code", "size.class"), nbName="number") | |
210 { | |
211 ## Purpose: Produce data.frame used as table from output of calcNumber.default.f(). | |
212 ## ---------------------------------------------------------------------- | |
213 ## Arguments: obs : observation table | |
214 ## ObsType : Type of observation (SVR, LIT, ...) | |
215 ## factors : aggregation factors | |
216 ## nbName : name of abundance column | |
217 ## | |
218 ## Output: data.frame with (N aggregation factors + 1) columns | |
219 ## ---------------------------------------------------------------------- | |
220 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:46 modified by Coline ROYAUX 04 june 2020 | |
221 | |
222 if (ObsType == "SVR") | |
223 { | |
224 ## Compute SVR abundances statistics : | |
225 statRotations <- statRotationsNumber.f(factors=factors, | |
226 obs=obs) | |
227 | |
228 ## Mean for rotating videos (3 rotations at most times) : | |
229 nbr <- statRotations[["nombresMean"]] | |
230 | |
231 }else{ | |
232 | |
233 nbr <- calcNumber.default.f(obs, factors, nbName) | |
234 } | |
235 | |
236 res <- as.data.frame(as.table(nbr), responseName=nbName) | |
237 | |
238 if (is.element("size.class", colnames(res))) | |
239 { | |
240 res$size.class[res$size.class == ""] <- NA | |
241 }else{} | |
242 | |
243 ## If integer abundances : | |
244 if (isTRUE(all.equal(res[ , nbName], as.integer(res[ , nbName])))) | |
245 { | |
246 res[ , nbName] <- as.integer(res[ , nbName]) | |
247 }else{} | |
248 | |
249 if (ObsType == "SVR") | |
250 { | |
251 ## statistics on abundances : | |
252 res$number.max <- as.vector(statRotations[["nombresMax"]]) | |
253 res$number.sd <- as.vector(statRotations[["nombresSD"]]) | |
254 | |
255 }else{} | |
256 | |
257 return(res) | |
258 } | |
259 | |
260 ######################################### end of the function calc.numbers.f | |
261 | |
262 ######################################### start of the function presAbs.f called by calcBiodiv.f | |
263 | |
264 presAbs.f <- function(nombres, logical=FALSE) | |
265 { | |
266 ## Purpose: Compute presence absence from abundances | |
267 ## ---------------------------------------------------------------------- | |
268 ## Arguments: nombres : vector of individuals count. | |
269 ## logical : (boolean) results as boolean or 0/1 ? | |
270 ## ---------------------------------------------------------------------- | |
271 ## Author: Yves Reecht, Date: 29 oct. 2010, 10:20 modified by Coline ROYAUX 04 june 2020 | |
272 | |
273 if (any(nombres < 0, na.rm=TRUE)) | |
274 { | |
275 stop("Negative abundances!") | |
276 }else{} | |
277 | |
278 if (logical) | |
279 { | |
280 return(nombres > 0) | |
281 }else{ | |
282 nombres[nombres > 0] <- 1 | |
283 return(nombres) | |
284 } | |
285 } | |
286 | |
287 ######################################### end of the function presAbs.f | |
288 | |
289 ######################################### start of the function betterCbind called by agregations.generic.f | |
290 | |
291 betterCbind <- function(..., dfList=NULL, deparse.level = 1) | |
292 { | |
293 ## Purpose: Apply cbind to data frame with mathcing columns but without | |
294 ## redundancies. | |
295 ## ---------------------------------------------------------------------- | |
296 ## Arguments: same as cbind... | |
297 ## dfList : data.frames list | |
298 ## ---------------------------------------------------------------------- | |
299 ## Author: Yves Reecht, Date: 17 janv. 2012, 21:10 modified by Coline ROYAUX 04 june 2020 | |
300 | |
301 if (is.null(dfList)) | |
302 { | |
303 dfList <- list(...) | |
304 }else{} | |
305 | |
306 return(do.call(cbind, | |
307 c(list(dfList[[1]][ , c(tail(colnames(dfList[[1]]), -1), | |
308 head(colnames(dfList[[1]]), 1))]), | |
309 lapply(dfList[-1], | |
310 function(x, colDel) | |
311 { | |
312 return(x[ , !is.element(colnames(x), | |
313 colDel), | |
314 drop=FALSE]) | |
315 }, | |
316 colDel=colnames(dfList[[1]])), | |
317 deparse.level=deparse.level))) | |
318 } | |
319 | |
320 ######################################### end of the function betterCbind | |
321 | |
322 ######################################### start of the function agregation.f called by agregations.generic.f | |
323 | |
324 agregation.f <- function(metric, Data, factors, casMetrique, | |
325 nbName="number") | |
326 { | |
327 ## Purpose: metric aggregation | |
328 ## ---------------------------------------------------------------------- | |
329 ## Arguments: metric: colnames of chosen metric | |
330 ## Data: Unaggregated data table | |
331 ## factors: aggregation factors vector | |
332 ## casMetrique: named vector of observation types depending | |
333 ## on chosen metric | |
334 ## nbName : abundance column name | |
335 ## ---------------------------------------------------------------------- | |
336 ## Author: Yves Reecht, Date: 20 déc. 2011, 14:29 modified by Coline ROYAUX 04 june 2020 | |
337 | |
338 switch(casMetrique[metric], | |
339 "sum"={ | |
340 res <- tapply(Data[ , metric], | |
341 as.list(Data[ , factors, drop=FALSE]), | |
342 function(x) | |
343 { | |
344 ifelse(all(is.na(x)), | |
345 NA, | |
346 sum(x, na.rm=TRUE)) | |
347 }) | |
348 }, | |
349 "w.mean"={ | |
350 res <- tapply(1:nrow(Data), | |
351 as.list(Data[ , factors, drop=FALSE]), | |
352 function(ii) | |
353 { | |
354 ifelse(all(is.na(Data[ii, metric])), | |
355 NA, | |
356 weighted.mean(Data[ii, metric], | |
357 Data[ii, nbName], | |
358 na.rm=TRUE)) | |
359 }) | |
360 }, | |
361 "w.mean.colonies"={ | |
362 res <- tapply(1:nrow(Data), | |
363 as.list(Data[ , factors, drop=FALSE]), | |
364 function(ii) | |
365 { | |
366 ifelse(all(is.na(Data[ii, metric])), | |
367 NA, | |
368 weighted.mean(Data[ii, metric], | |
369 Data[ii, "colonies"], | |
370 na.rm=TRUE)) | |
371 }) | |
372 }, | |
373 "w.mean.prop"={ | |
374 res <- tapply(1:nrow(Data), | |
375 as.list(Data[ , factors, drop=FALSE]), | |
376 function(ii) | |
377 { | |
378 ifelse(all(is.na(Data[ii, metric])) || sum(Data[ii, "nombre.tot"], na.rm=TRUE) == 0, | |
379 NA, | |
380 ifelse(all(na.omit(Data[ii, metric]) == 0), # Pour ne pas avoir NaN. | |
381 0, | |
382 (sum(Data[ii, nbName][ !is.na(Data[ii, metric])], na.rm=TRUE) / | |
383 sum(Data[ii, "nombre.tot"], na.rm=TRUE)) * | |
384 ## Correction if size class isn't an aggregation factor | |
385 ## (otherwise value divided by number of present classes) : | |
386 ifelse(is.element("size.class", factors), | |
387 100, | |
388 100 * length(unique(Data$size.class))))) | |
389 }) | |
390 | |
391 }, | |
392 "w.mean.prop.bio"={ | |
393 res <- tapply(1:nrow(Data), | |
394 as.list(Data[ , factors, drop=FALSE]), | |
395 function(ii) | |
396 { | |
397 ifelse(all(is.na(Data[ii, metric])) || sum(Data[ii, "tot.biomass"], na.rm=TRUE) == 0, | |
398 NA, | |
399 ifelse(all(na.omit(Data[ii, metric]) == 0), # Pour ne pas avoir NaN. | |
400 0, | |
401 (sum(Data[ii, "biomass"][ !is.na(Data[ii, metric])], na.rm=TRUE) / | |
402 sum(Data[ii, "tot.biomass"], na.rm=TRUE)) * | |
403 ## Correction if size class isn't an aggregation factor | |
404 ## (otherwise value divided by number of present classes) : | |
405 ifelse(is.element("size.class", factors), | |
406 100, | |
407 100 * length(unique(Data$size.class))))) | |
408 }) | |
409 | |
410 }, | |
411 "pres"={ | |
412 res <- tapply(Data[ , metric], | |
413 as.list(Data[ , factors, drop=FALSE]), | |
414 function(x) | |
415 { | |
416 ifelse(all(is.na(x)), # When only NAs. | |
417 NA, | |
418 ifelse(any(x > 0, na.rm=TRUE), # Otherwise... | |
419 1, # ... presence if at least one observation in the group. | |
420 0)) | |
421 }) | |
422 }, | |
423 "nbMax"={ | |
424 ## Recuperation of raw abundances with selections : | |
425 nbTmp <- getReducedSVRdata.f(dataName=".NombresSVR", data=Data) | |
426 | |
427 ## Sum by factor cross / rotation : | |
428 nbTmp2 <- apply(nbTmp, | |
429 which(is.element(names(dimnames(nbTmp)), c(factors, "rotation"))), | |
430 function(x) | |
431 { | |
432 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE)) | |
433 }) | |
434 | |
435 ## Sum by factor cross : | |
436 res <- as.array(apply(nbTmp2, | |
437 which(is.element(names(dimnames(nbTmp)), factors)), | |
438 function(x) | |
439 { | |
440 ifelse(all(is.na(x)), NA, max(x, na.rm=TRUE)) | |
441 })) | |
442 }, | |
443 "nbSD"={ | |
444 ## Recuperation of raw abundances with selections : | |
445 nbTmp <- getReducedSVRdata.f(dataName=".NombresSVR", data=Data) | |
446 | |
447 ## Sum by factor cross / rotation : | |
448 nbTmp2 <- apply(nbTmp, | |
449 which(is.element(names(dimnames(nbTmp)), c(factors, "rotation"))), | |
450 function(x) | |
451 { | |
452 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE)) | |
453 }) | |
454 | |
455 ## Sum by factor cross : | |
456 res <- as.array(apply(nbTmp2, | |
457 which(is.element(names(dimnames(nbTmp)), factors)), | |
458 function(x) | |
459 { | |
460 ifelse(all(is.na(x)), NA, sd(x, na.rm=TRUE)) | |
461 })) | |
462 }, | |
463 "densMax"={ | |
464 ## Recuperation of raw abundances with selections : | |
465 densTmp <- getReducedSVRdata.f(dataName=".DensitesSVR", data=Data) | |
466 | |
467 ## Sum by factor cross / rotation : | |
468 densTmp2 <- apply(densTmp, | |
469 which(is.element(names(dimnames(densTmp)), c(factors, "rotation"))), | |
470 function(x) | |
471 { | |
472 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE)) | |
473 }) | |
474 | |
475 ## Sum by factor cross : | |
476 res <- as.array(apply(densTmp2, | |
477 which(is.element(names(dimnames(densTmp)), factors)), | |
478 function(x) | |
479 { | |
480 ifelse(all(is.na(x)), NA, max(x, na.rm=TRUE)) | |
481 })) | |
482 }, | |
483 "densSD"={ | |
484 ## Recuperation of raw abundances with selections : | |
485 densTmp <- getReducedSVRdata.f(dataName=".DensitesSVR", data=Data) | |
486 | |
487 ## Sum by factor cross / rotation : | |
488 densTmp2 <- apply(densTmp, | |
489 which(is.element(names(dimnames(densTmp)), c(factors, "rotation"))), | |
490 function(x) | |
491 { | |
492 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE)) | |
493 }) | |
494 | |
495 ## Sum by factor cross : | |
496 res <- as.array(apply(densTmp2, | |
497 which(is.element(names(dimnames(densTmp)), factors)), | |
498 function(x) | |
499 { | |
500 ifelse(all(is.na(x)), NA, sd(x, na.rm=TRUE)) | |
501 })) | |
502 }, | |
503 "%.nesting"={ | |
504 res <- tapply(1:nrow(Data), | |
505 as.list(Data[ , factors, drop=FALSE]), | |
506 function(ii) | |
507 { | |
508 ifelse(all(is.na(Data[ii, metric])), | |
509 NA, | |
510 weighted.mean(Data[ii, metric], | |
511 Data[ii, "readable.tracks"], | |
512 na.rm=TRUE)) | |
513 }) | |
514 }, | |
515 stop("Not implemented!") | |
516 ) | |
517 | |
518 ## dimension names | |
519 names(dimnames(res)) <- c(factors) | |
520 | |
521 ## Transformation to long format : | |
522 reslong <- as.data.frame(as.table(res), responseName=metric) | |
523 reslong <- reslong[ , c(tail(colnames(reslong), 1), head(colnames(reslong), -1))] # metric first | |
524 | |
525 return(reslong) | |
526 } | |
527 | |
528 ######################################### end of the function agregation.f | |
529 | |
530 ######################################### start of the function agregations.generic.f called y calcBiodiv.f in FucntExeCalcCommIndexesGalaxy.r | |
531 | |
532 agregations.generic.f <- function(Data, metrics, factors, listFact=NULL, unitSpSz=NULL, unitSp=NULL, | |
533 nbName="number") | |
534 { | |
535 ## Purpose: Aggregate data | |
536 ## ---------------------------------------------------------------------- | |
537 ## Arguments: Data : data set | |
538 ## metrics : aggregated metric | |
539 ## factors : aggregation factors | |
540 ## listFact : other factors to aggregate and add to output | |
541 ## unitSpSz : Metrics table by unitobs/species/Size Class | |
542 ## unitSp : Metrics table by unitobs/species | |
543 ## nbName : abundance colname | |
544 ## | |
545 ## Output : aggregated data frame | |
546 ## ---------------------------------------------------------------------- | |
547 ## Author: Yves Reecht, Date: 18 oct. 2010, 15:47 modified by Coline ROYAUX 04 june 2020 | |
548 | |
549 ## trt depending on metric type : | |
550 casMetrique <- c("number"="sum", | |
551 "mean.length"="w.mean", | |
552 "taille_moy"="w.mean", | |
553 "biomass"="sum", | |
554 "Biomass"="sum", | |
555 "weight"="sum", | |
556 "mean.weight"="w.mean", | |
557 "density"="sum", | |
558 "Density"="sum", | |
559 "CPUE"="sum", | |
560 "CPUE.biomass"="sum", | |
561 "pres.abs"="pres", | |
562 "abundance.prop.SC"="w.mean.prop", # Not OK [!!!] ? | |
563 "biomass.prop.SC"="w.mean.prop.bio", # Not OK [!!!] ? | |
564 ## Benthos : | |
565 "colonies"="sum", | |
566 "coverage"="sum", | |
567 "mean.size.colonies"="w.mean.colonies", | |
568 ## SVR (expérimental) : | |
569 "number.max"="nbMax", | |
570 "number.sd"="nbSD", | |
571 "density.max"="densMax", | |
572 "density.sd"="densSD", | |
573 "biomass.max"="sum", | |
574 "spawning.success"="%.nesting", | |
575 "spawnings"="sum", | |
576 "readable.tracks"="sum", | |
577 "tracks.number"="sum") | |
578 | |
579 ## add "readable.tracks" for egg laying percentage : | |
580 if (any(casMetrique[metrics] == "%.nesting")) | |
581 { | |
582 if (is.element("size.class", colnames(Data))) | |
583 { | |
584 if (is.null(unitSpSz)) stop("unitSpSz doit être défini") | |
585 | |
586 Data <- merge(Data, | |
587 unitSpSz[ , c("species.code", "observation.unit", "size.class", "readable.tracks")], | |
588 by=c("species.code", "observation.unit", "size.class"), | |
589 suffixes=c("", ".y")) | |
590 }else{ | |
591 if (is.null(unitSp)) stop("unitSp must be defined") | |
592 | |
593 Data <- merge(Data, | |
594 unitSp[ , c("species.code", "observation.unit", "readable.tracks")], | |
595 by=c("species.code", "observation.unit"), | |
596 suffixes=c("", ".y")) | |
597 } | |
598 }else{} | |
599 | |
600 ## Add "number" field for computing ponderate means if absent : | |
601 if (any(casMetrique[metrics] == "w.mean" | casMetrique[metrics] == "w.mean.prop")) | |
602 { | |
603 if (is.element("size.class", colnames(Data))) | |
604 { | |
605 if (is.null(unitSpSz)) stop("unitSpSz must be defined") | |
606 | |
607 Data <- merge(Data, | |
608 unitSpSz[ , c("species.code", "observation.unit", "size.class", nbName)], | |
609 by=c("species.code", "observation.unit", "size.class"), | |
610 suffixes=c("", ".y")) | |
611 | |
612 ## add tot abundance / species / observation unit : | |
613 nbTot <- tapply(unitSpSz[ , nbName], | |
614 as.list(unitSpSz[ , c("species.code", "observation.unit")]), | |
615 sum, na.rm=TRUE) | |
616 | |
617 Data <- merge(Data, | |
618 as.data.frame(as.table(nbTot), responseName="nombre.tot")) | |
619 }else{ | |
620 if (is.null(unitSp)) stop("unitSp must be defined") | |
621 | |
622 Data <- merge(Data, | |
623 unitSp[ , c("species.code", "observation.unit", nbName)], # [!!!] unitSpSz ? | |
624 by=c("species.code", "observation.unit"), | |
625 suffixes=c("", ".y")) | |
626 } | |
627 }else{} | |
628 | |
629 ## Add biomass field of biomass proportion by size class : | |
630 if (any(casMetrique[metrics] == "w.mean.prop.bio")) | |
631 { | |
632 if (is.null(unitSpSz)) stop("unitSpSz doit être défini") | |
633 | |
634 Data <- merge(Data, | |
635 unitSpSz[ , c("species.code", "observation.unit", "size.class", "biomass")], | |
636 by=c("species.code", "observation.unit", "size.class"), | |
637 suffixes=c("", ".y")) | |
638 | |
639 ## add tot biomass / species / observation unit : | |
640 biomTot <- tapply(unitSpSz$biomass, | |
641 as.list(unitSpSz[ , c("species.code", "observation.unit")]), | |
642 function(x) | |
643 { | |
644 ifelse(all(is.na(x)), | |
645 NA, | |
646 sum(x, na.rm=TRUE)) | |
647 }) | |
648 | |
649 Data <- merge(Data, | |
650 as.data.frame(as.table(biomTot), responseName="tot.biomass")) | |
651 } | |
652 | |
653 ## add colony field for ponderate means pondérées if absent : | |
654 if (any(casMetrique[metrics] == "w.mean.colonies" & ! is.element("colonies", colnames(Data)))) | |
655 { | |
656 Data$colonies <- unitSp[match(apply(Data[ , c("species.code", "observation.unit")], | |
657 1, paste, collapse="*"), | |
658 apply(unitSp[ , c("species.code", "observation.unit")], | |
659 1, paste, collapse="*")), "colonies"] | |
660 }else{} | |
661 | |
662 | |
663 ## Aggregation of metric depending on factors : | |
664 reslong <- betterCbind(dfList=lapply(metrics, # sapply used to have names | |
665 agregation.f, | |
666 Data=Data, factors=factors, casMetrique=casMetrique, | |
667 nbName=nbName)) | |
668 | |
669 ## Aggregation and add other factors : | |
670 if ( ! (is.null(listFact) || length(listFact) == 0)) | |
671 { | |
672 reslong <- cbind(reslong, | |
673 sapply(Data[ , listFact, drop=FALSE], | |
674 function(fact) | |
675 { | |
676 tapply(fact, | |
677 as.list(Data[ , factors, drop=FALSE]), | |
678 function(x) | |
679 { | |
680 if (length(x) > 1 && length(unique(x)) > 1) # must be one modality | |
681 { | |
682 return(NULL) # otherwise it is NULL | |
683 }else{ | |
684 unique(as.character(x)) | |
685 } | |
686 }) | |
687 })) | |
688 }else{} | |
689 | |
690 ## If some factors aren't at the right class : | |
691 if (any(tmp <- sapply(reslong[ , listFact, drop=FALSE], class) != sapply(Data[ , listFact, drop=FALSE], class))) | |
692 { | |
693 for (i in which(tmp)) | |
694 { | |
695 switch(sapply(Data[ , listFact, drop=FALSE], class)[i], | |
696 "integer"={ | |
697 reslong[ , listFact[i]] <- as.integer(as.character(reslong[ , listFact[i]])) | |
698 }, | |
699 "numeric"={ | |
700 reslong[ , listFact[i]] <- as.numeric(as.character(reslong[ , listFact[i]])) | |
701 }, | |
702 reslong[ , listFact[i]] <- eval(call(paste("as", sapply(Data[ , listFact, drop=FALSE], class)[i], sep="."), | |
703 reslong[ , listFact[i]])) | |
704 ) | |
705 } | |
706 }else{} | |
707 | |
708 ## Initial order of factors levels : | |
709 reslong <- as.data.frame(sapply(colnames(reslong), | |
710 function(x) | |
711 { | |
712 if (is.factor(reslong[ , x])) | |
713 { | |
714 return(factor(reslong[ , x], levels=levels(Data[ , x]))) | |
715 }else{ | |
716 return(reslong[ , x]) | |
717 } | |
718 }, simplify=FALSE)) | |
719 | |
720 | |
721 ## Check of other aggregated factors supplémentaires. There must be no NULL elements : | |
722 if (any(sapply(reslong[ , listFact], function(x){any(is.null(unlist(x)))}))) | |
723 { | |
724 warning(paste("One of the suppl. factors is probably a subset", | |
725 " of the observations grouping factor(s).", sep="")) | |
726 return(NULL) | |
727 }else{ | |
728 return(reslong) | |
729 } | |
730 } | |
731 | |
732 ######################################### end of the function agregations.generic.f | |
733 | |
734 ######################################### start of the function dropLevels.f called y calcBiodiv.f in FucntExeCalcCommIndexesGalaxy.r and modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r | |
735 dropLevels.f <- function(df, which=NULL) | |
736 { | |
737 ## Purpose: Suppress unused levels of factors | |
738 ## ---------------------------------------------------------------------- | |
739 ## Arguments: df : a data.frame | |
740 ## which : included columns index (all by default) | |
741 ## ---------------------------------------------------------------------- | |
742 ## Author: Yves Reecht, Date: 10 août 2010, 13:29 modified by Coline ROYAUX 04 june 2020 | |
743 | |
744 if (class(df) != "data.frame") | |
745 { | |
746 stop("'df' must be a data.frame") | |
747 }else{ | |
748 if (is.null(which)) | |
749 { | |
750 x <- as.data.frame(sapply(df, function(x) | |
751 { | |
752 return(x[ ,drop=TRUE]) | |
753 }, simplify=FALSE), | |
754 stringsAsFactors=FALSE) | |
755 }else{ # Only some columns used | |
756 x <- df | |
757 | |
758 x[ , which] <- as.data.frame(sapply(df[ , which, drop=FALSE], | |
759 function(x) | |
760 { | |
761 return(x[ , drop=TRUE]) | |
762 }, simplify=FALSE), | |
763 stringsAsFactors=FALSE) | |
764 } | |
765 | |
766 return(x) | |
767 } | |
768 } | |
769 ######################################### end of the function dropLevels.f | |
770 | |
771 ######################################### start of the function subsetToutesTables.f called by modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r | |
772 | |
773 subsetToutesTables.f <- function(metrique, tabMetrics, facteurs, selections, | |
774 tabUnitobs, refesp, tableMetrique="", nbName="number", ObsType = "", | |
775 exclude=NULL, add=c("species.code", "observation.unit")) | |
776 { | |
777 ## Purpose: Extract useful data only from chosen metrics and factors | |
778 ## ---------------------------------------------------------------------- | |
779 ## Arguments: metrique : chosen metric | |
780 ## facteurs : all chosen factors | |
781 ## selections : corresponding modality selected | |
782 ## tableMetrique : metrics table name | |
783 ## exclude : factors levels to exclude | |
784 ## add : field to add to data table | |
785 ## ---------------------------------------------------------------------- | |
786 ## Author: Yves Reecht, Date: 6 août 2010, 16:46 modified by Coline ROYAUX 04 june 2020 | |
787 | |
788 ## If no metrics table available : | |
789 if (is.element(tableMetrique, c("", "TableOccurrences", "TablePresAbs"))) | |
790 { | |
791 tableMetrique <- "unitSp" | |
792 }else{} | |
793 | |
794 casTables <- c("unitSp"="unitSp", | |
795 "TablePresAbs"="unitSp", | |
796 "unitSpSz"="unitSpSz") | |
797 | |
798 ## Recuperation of metrics table : | |
799 dataMetrique <- tabMetrics | |
800 unitobs <- tabUnitobs | |
801 refesp <- refesp | |
802 | |
803 ## If no metrics available or already computed : | |
804 if (is.element(metrique, c("", "occurrence.frequency"))) | |
805 { | |
806 metrique <- "tmp" | |
807 dataMetrique$tmp <- 0 | |
808 dataMetrique$tmp[dataMetrique[ , nbName] > 0] <- 1 | |
809 }else{} | |
810 | |
811 if (!is.null(add)) | |
812 { | |
813 metriques <- c(metrique, add[is.element(add, colnames(dataMetrique))]) | |
814 }else{ | |
815 metriques <- metrique | |
816 } | |
817 | |
818 ## Subset depending on metrics table | |
819 switch(casTables[tableMetrique], | |
820 ## Observation table by unitobs and species : | |
821 unitSp={ | |
822 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , metriques, drop=FALSE], | |
823 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])], | |
824 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs | |
825 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE], | |
826 refesp[match(dataMetrique$species.code[!is.na(dataMetrique[ , metrique])], | |
827 refesp$species.code), # ajout des colonnes sélectionnées d'especes | |
828 facteurs[is.element(facteurs, colnames(refesp))], drop=FALSE]) | |
829 }, | |
830 ## Observation table by unitobs, species and size class : | |
831 unitSpSz={ | |
832 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , | |
833 c(metriques, "size.class"), drop=FALSE], | |
834 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])], | |
835 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs | |
836 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE], | |
837 refesp[match(dataMetrique$species.code[!is.na(dataMetrique[ , metrique])], | |
838 refesp$species.code), # ajout des colonnes sélectionnées d'especes | |
839 facteurs[is.element(facteurs, colnames(refesp))], drop=FALSE]) | |
840 }, | |
841 ## Other cases : | |
842 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , metriques, drop=FALSE], | |
843 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])], | |
844 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs. | |
845 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE]) | |
846 ) | |
847 | |
848 selCol <- which(!is.na(selections)) | |
849 if (!is.null(exclude)) | |
850 { | |
851 selCol <- selCol[selCol != exclude] | |
852 } | |
853 | |
854 ## Particular case of size classes : | |
855 if (is.element("size.class", colnames(restmp))) | |
856 { | |
857 if (length(grep("^[[:digit:]]*[-_][[:digit:]]*$", unique(as.character(restmp$size.class)), perl=TRUE)) == | |
858 length(unique(as.character(restmp$size.class)))) | |
859 { | |
860 restmp$size.class <- | |
861 factor(as.character(restmp$size.class), | |
862 levels=unique(as.character(restmp$size.class))[ | |
863 order(as.numeric(sub("^([[:digit:]]*)[-_][[:digit:]]*$", | |
864 "\\1", | |
865 unique(as.character(restmp$size.class)), | |
866 perl=TRUE)), | |
867 na.last=FALSE)]) | |
868 }else{ | |
869 restmp$size.class <- factor(restmp$size.class) | |
870 } | |
871 }else{} | |
872 | |
873 ## Biomass and density conversion -> /100m² : | |
874 if (any(is.element(colnames(restmp), c("biomass", "density", | |
875 "biomass.max", "density.max", | |
876 "biomass.sd", "density.sd"))) && ObsType != "fishing") | |
877 { | |
878 restmp[ , is.element(colnames(restmp), | |
879 c("biomass", "density", | |
880 "biomass.max", "density.max", | |
881 "biomass.sd", "density.sd"))] <- 100 * | |
882 restmp[, is.element(colnames(restmp), | |
883 c("biomass", "density", | |
884 "biomass.max", "density.max", | |
885 "biomass.sd", "density.sd"))] | |
886 }else{} | |
887 | |
888 return(restmp) | |
889 } | |
890 | |
891 ######################################### end of the function subsetToutesTables.f | |
892 | |
893 | |
894 ######################################### start of the function sortiesLM.f called by modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r | |
895 sortiesLM.f <- function(objLM, TabSum, #formule, | |
896 metrique, factAna, cut, colAna, listFact, lev = NULL, Data, | |
897 Log=FALSE, sufixe=NULL, type="espece") | |
898 { | |
899 ## Purpose: Form GLM and LM results | |
900 ## ---------------------------------------------------------------------- | |
901 ## Arguments: objLM : lm object | |
902 ## TabSum : output summary table | |
903 ## formule : LM formula | |
904 ## metrique : Chosen metric | |
905 ## factAna : separation factor | |
906 ## cut : level of separation factor | |
907 ## colAna : colname for separation factor in output summary table | |
908 ## listFact : Analysis factors list | |
909 ## levels : Levels of analysis factors list | |
910 ## Data : Data used for analysis | |
911 ## Log : put log on metric ? (boolean) | |
912 ## sufixe : sufix for file name | |
913 ## type : analysis type | |
914 ## ---------------------------------------------------------------------- | |
915 ## Author: Yves Reecht, Date: 25 août 2010, 16:19 modified by Coline ROYAUX 04 june 2020 | |
916 | |
917 sumLM <- summary(objLM) | |
918 if (length(grep("^glmmTMB", objLM$call)) > 0) #if random effects | |
919 { | |
920 TabSum[TabSum[,colAna]==cut,"AIC"] <- sumLM$AICtab[1] | |
921 TabSum[TabSum[,colAna]==cut,"BIC"] <- sumLM$AICtab[2] | |
922 TabSum[TabSum[,colAna]==cut,"logLik"] <- sumLM$AICtab[3] | |
923 TabSum[TabSum[,colAna]==cut,"deviance"] <- sumLM$AICtab[4] | |
924 TabSum[TabSum[,colAna]==cut,"df.resid"] <- sumLM$AICtab[5] | |
925 | |
926 if (! is.null(lev)) ## if fixed effects + random effects | |
927 { | |
928 TabCoef <- as.data.frame(sumLM$coefficients$cond) | |
929 TabCoef$signif <- lapply(TabCoef[,"Pr(>|z|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}}) | |
930 | |
931 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Zvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"z value"] | |
932 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|z|)"] | |
933 | |
934 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Zvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"z value"]}else{NA}})) | |
935 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|z|)"]}else{NA}})) | |
936 }else{} | |
937 | |
938 switch(as.character(length(sumLM$varcor$cond)), | |
939 "1"={StdD <- c(sumLM$varcor$cond[[1]])}, | |
940 "2"={StdD <- c(sumLM$varcor$cond[[1]],sumLM$varcor$cond[[2]])}, | |
941 StdD <- NULL) | |
942 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"Std.Dev",collapse="|"),colnames(TabSum))] <- StdD | |
943 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"NbObservation",collapse="|"),colnames(TabSum))] <- sumLM$nobs | |
944 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"NbLevels",collapse="|"),colnames(TabSum))] <- unlist(lapply(listRand,FUN=function(x){nlevels(Data[,x])})) | |
945 | |
946 }else{ ## if fixed effects only | |
947 | |
948 TabSum[TabSum[,colAna]==cut,"AIC"] <- sumLM$aic | |
949 TabSum[TabSum[,colAna]==cut,"Resid.deviance"] <- sumLM$deviance | |
950 TabSum[TabSum[,colAna]==cut,"df.resid"] <- sumLM$df.residual | |
951 TabSum[TabSum[,colAna]==cut,"Null.deviance"] <- sumLM$null.deviance | |
952 TabSum[TabSum[,colAna]==cut,"df.null"] <- sumLM$df.null | |
953 TabCoef <- as.data.frame(sumLM$coefficients) | |
954 | |
955 if (sumLM$family[1] == "gaussian" || sumLM$family[1] == "quasipoisson") | |
956 { | |
957 | |
958 TabCoef$signif <- lapply(TabCoef[,"Pr(>|t|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}}) | |
959 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Tvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"t value"] | |
960 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|t|)"] | |
961 | |
962 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Tvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"t value"]}else{NA}})) | |
963 | |
964 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|t|)"]}else{NA}})) | |
965 }else{ | |
966 TabCoef$signif <- lapply(TabCoef[,"Pr(>|z|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}}) | |
967 | |
968 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Zvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"z value"] | |
969 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|z|)"] | |
970 | |
971 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Zvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"z value"]}else{NA}})) | |
972 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|z|)"]}else{NA}})) | |
973 } | |
974 } | |
975 | |
976 if (! is.null(lev)) ## if fixed effects | |
977 { | |
978 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Estimate",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Estimate"] | |
979 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Std.Err",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Std. Error"] | |
980 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*signif",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"signif"] | |
981 | |
982 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Estimate",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Estimate"]}else{NA}})) | |
983 | |
984 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Std.Err",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Std. Error"]}else{NA}})) | |
985 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"signif",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"signif"]}else{NA}})) | |
986 }else{} | |
987 | |
988 return(TabSum) | |
989 | |
990 } | |
991 | |
992 | |
993 ######################################### end of the function sortiesLM.f | |
994 | |
995 ######################################### start of the function graphTitle.f called by sortiesLM.f | |
996 | |
997 graphTitle.f <- function(metrique, modGraphSel, factGraph, listFact, model=NULL, type="espece", | |
998 lang = getOption("P.lang")) | |
999 { | |
1000 ## Purpose: Automatically write a name for a graph | |
1001 ## ---------------------------------------------------------------------- | |
1002 ## Arguments: | |
1003 ## ---------------------------------------------------------------------- | |
1004 ## Author: Yves Reecht, Date: 14 oct. 2010, 15:44 modified by Coline ROYAUX 04 june 2020 | |
1005 return(paste(ifelse(is.null(model), | |
1006 "Values of ", | |
1007 paste(model, | |
1008 " for", | |
1009 sep="")), | |
1010 metrique, | |
1011 ifelse(is.element(type, c("espece", "unitobs", "CL_espece", "unitobs(CL)")), | |
1012 paste("aggregated"), | |
1013 ""), | |
1014 switch(type, | |
1015 "espece"=" per species and station", | |
1016 "CL_espece"=" per size class, species and station", | |
1017 "unitobs"=" per station", | |
1018 "unitobs(CL)"=" per station", | |
1019 "CL_unitobs"=" per size class and station", | |
1020 "biodiv"=" per station", | |
1021 ""), | |
1022 switch(type, | |
1023 "espece"={ | |
1024 ifelse(modGraphSel == "", # Only separation factor if defined | |
1025 "", | |
1026 paste("\nfor the field", | |
1027 " '", factGraph, "' = ", modGraphSel, sep="")) | |
1028 }, | |
1029 "CL_espece"={ | |
1030 ifelse(modGraphSel == "", # Only separation factor if defined | |
1031 "", | |
1032 paste("\nfor the field", | |
1033 " '", factGraph, "' = ", modGraphSel, sep="")) | |
1034 }, | |
1035 "unitobs"={ | |
1036 ifelse(modGraphSel[1] == "", # Only separation factor if defined | |
1037 "\nfor all species", | |
1038 paste("\nfor all species matching", | |
1039 " '", factGraph, "' = (", | |
1040 paste(modGraphSel, collapse=", "), ")", sep="")) | |
1041 }, | |
1042 "unitobs(CL)"={ | |
1043 ifelse(modGraphSel[1] == "", # Only separation factor if defined | |
1044 "\nfor all size classes", | |
1045 paste("\nfor size classes matching", | |
1046 " '", factGraph, "' = (", | |
1047 paste(modGraphSel, collapse=", "), ")", sep="")) | |
1048 }, | |
1049 "CL_unitobs"={ | |
1050 ifelse(modGraphSel[1] == "", # Only separation factor if defined | |
1051 "\nfor all species", | |
1052 paste("\nfor all species matching", | |
1053 " '", factGraph, "' = (", | |
1054 paste(modGraphSel, collapse=", "), ")", sep="")) | |
1055 }, | |
1056 "biodiv"={ | |
1057 ifelse(modGraphSel[1] == "", # Only separation factor if defined | |
1058 "", | |
1059 paste("\nfor stations matching", | |
1060 " '", factGraph, "' = (", | |
1061 paste(modGraphSel, collapse=", "), ")", sep="")) | |
1062 }, | |
1063 ""), | |
1064 "\n by ", | |
1065 paste(sapply(listFact[length(listFact):1], | |
1066 function(x)paste(c(## varNames.f(x, "article"), | |
1067 "", | |
1068 x, collapse="")), | |
1069 collapse=" and"), | |
1070 "\n", sep=""))) | |
1071 } | |
1072 | |
1073 ######################################### end of the function graphTitle.f | |
1074 | |
1075 ######################################### start of the function noteGLM.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f | |
1076 | |
1077 noteGLM.f <- function(data, objLM, metric, listFact, details = FALSE) | |
1078 { | |
1079 ## Purpose: Note your GLM analysis | |
1080 ## ---------------------------------------------------------------------- | |
1081 ## Arguments: data : Dataframe used for analysis | |
1082 ## objLM : GLM assessed | |
1083 ## metric : selected metric | |
1084 ## listFact : Analysis factors list | |
1085 ## ---------------------------------------------------------------------- | |
1086 ## Author: Coline ROYAUX, 26 june 2020 | |
1087 | |
1088 rate <- 0 | |
1089 detres <- list(complete_plan=NA, balanced_plan=NA, NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA) | |
1090 | |
1091 #### Data criterions #### | |
1092 | |
1093 ## Plan | |
1094 | |
1095 plan <- as.data.frame(table(data[,listFact])) | |
1096 | |
1097 if (nrow(plan[plan$Freq==0,]) < nrow(plan)*0.1) # +0.5 if less than 10% of possible factor's level combinations aren't represented in the sampling scheme | |
1098 { | |
1099 rate <- rate + 0.5 | |
1100 detres$complete_plan <- TRUE | |
1101 | |
1102 if (summary(as.factor(plan$Freq))[1] > nrow(plan)*0.9) # +0.5 if the frequency of the most represented frequency of possible factor's levels combinations is superior to 90% of the total number of possible factor's levels combinations | |
1103 { | |
1104 rate <- rate + 0.5 | |
1105 detres$balanced_plan <- TRUE | |
1106 }else{} | |
1107 | |
1108 }else{ | |
1109 detres$complete_plan <- FALSE | |
1110 detres$balanced_plan <- FALSE | |
1111 } | |
1112 | |
1113 if (nrow(data) - nrow(na.omit(data)) < nrow(data)*0.1) # +1 if less than 10% of the lines in the dataframe bares a NA | |
1114 { | |
1115 rate <- rate + 1 | |
1116 detres$NA_proportion_OK <- TRUE | |
1117 }else{ | |
1118 detres$NA_proportion_OK <- FALSE | |
1119 } | |
1120 | |
1121 #### Model criterions #### | |
1122 | |
1123 if (length(grep("quasi",objLM$family)) == 0) #DHARMa doesn't work with quasi distributions | |
1124 { | |
1125 | |
1126 Residuals <- simulateResiduals(objLM) | |
1127 | |
1128 capture.output(testRes <- testResiduals(Residuals)) | |
1129 testZero <- testZeroInflation(Residuals) | |
1130 | |
1131 ## dispersion of residuals | |
1132 | |
1133 if (testRes$dispersion$p.value > 0.05) # +1.5 if dispersion tests not significative | |
1134 { | |
1135 rate <- rate + 1.5 | |
1136 detres$no_residual_dispersion <- TRUE | |
1137 }else{ | |
1138 detres$no_residual_dispersion <- FALSE | |
1139 } | |
1140 | |
1141 ## uniformity of residuals | |
1142 | |
1143 if (testRes$uniformity$p.value > 0.05) # +1 if uniformity tests not significative | |
1144 { | |
1145 rate <- rate + 1.5 | |
1146 detres$uniform_residuals <- TRUE | |
1147 }else{ | |
1148 detres$uniform_residuals <- FALSE | |
1149 } | |
1150 | |
1151 ## residuals outliers | |
1152 | |
1153 if (testRes$outliers$p.value > 0.05) # +0.5 if outliers tests not significative | |
1154 { | |
1155 rate <- rate + 0.5 | |
1156 detres$outliers_proportion_OK <- TRUE | |
1157 }else{ | |
1158 detres$outliers_proportion_OK <- FALSE | |
1159 } | |
1160 | |
1161 ## Zero inflation test | |
1162 | |
1163 if (testZero$p.value > 0.05) # +1 if zero inflation tests not significative | |
1164 { | |
1165 rate <- rate + 1.5 | |
1166 detres$no_zero_inflation <- TRUE | |
1167 }else{ | |
1168 detres$no_zero_inflation <- FALSE | |
1169 } | |
1170 | |
1171 ## Factors/observations ratio | |
1172 | |
1173 if (length(listFact)/nrow(na.omit(data)) < 0.1) # +1 if quantity of factors is less than 10% of the quantity of observations | |
1174 { | |
1175 rate <- rate + 1 | |
1176 detres$observation_factor_ratio_OK <- TRUE | |
1177 }else{ | |
1178 detres$observation_factor_ratio_OK <- FALSE | |
1179 } | |
1180 | |
1181 ## less than 10 factors' level on random effect | |
1182 | |
1183 if (length(grep("^glmmTMB", objLM$call)) > 0) | |
1184 { | |
1185 nlevRand <- c() | |
1186 for(fact in names(summary(objLM)$varcor$cond)) | |
1187 { | |
1188 nlevRand <- c(nlevRand,length(unlist(unique(data[,fact])))) | |
1189 } | |
1190 | |
1191 if (all(nlevRand > 10)) # +1 if more than 10 levels in one random effect | |
1192 { | |
1193 rate <- rate + 1 | |
1194 detres$enough_levels_random_effect <- TRUE | |
1195 }else{ | |
1196 detres$enough_levels_random_effect <- FALSE | |
1197 } | |
1198 }else{} | |
1199 | |
1200 detres$rate <- rate | |
1201 | |
1202 if (details) | |
1203 { | |
1204 return(detres) | |
1205 }else{ | |
1206 return(rate) | |
1207 } | |
1208 | |
1209 }else{ | |
1210 return(NA) | |
1211 cat("Models with quasi distributions can't be rated for now") | |
1212 } | |
1213 } | |
1214 | |
1215 ######################################### end of the function noteGLM.f | |
1216 | |
1217 ######################################### start of the function noteGLMs.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f | |
1218 | |
1219 noteGLMs.f <- function(tabRate, exprML, objLM, file_out=FALSE) | |
1220 { | |
1221 ## Purpose: Note your GLM analysis | |
1222 ## ---------------------------------------------------------------------- | |
1223 ## Arguments: data : rates table from noteGLM.f | |
1224 ## objLM : GLM assessed | |
1225 ## metric : selected metric | |
1226 ## listFact : Analysis factors list | |
1227 ## ---------------------------------------------------------------------- | |
1228 ## Author: Coline ROYAUX, 26 june 2020 | |
1229 | |
1230 RateM <- mean(na.omit(tabRate[,"rate"])) | |
1231 sum <- summary(objLM) | |
1232 | |
1233 if (length(grep("^glmmTMB", objLM$call)) > 0) | |
1234 { | |
1235 if (median(na.omit(tabRate[,"rate"])) >= 6) # if 50% has a rate superior or equal to 6 +1 | |
1236 { | |
1237 RateM <- RateM + 1 | |
1238 } | |
1239 | |
1240 if (quantile(na.omit(tabRate[,"rate"]), probs=0.9) >= 6) # if 90% has a rate superior or equal to 6 +1 | |
1241 { | |
1242 RateM <- RateM + 1 | |
1243 } | |
1244 }else{ | |
1245 if (median(na.omit(tabRate[,"rate"])) >= 5) # if 50% has a rate superior or equal to 5 +1 | |
1246 { | |
1247 RateM <- RateM + 1 | |
1248 } | |
1249 | |
1250 if (quantile(na.omit(tabRate[,"rate"]), probs=0.9) >= 5) # if 90% has a rate superior or equal to 5 +1 | |
1251 { | |
1252 RateM <- RateM + 1 | |
1253 } | |
1254 } | |
1255 | |
1256 if (file_out) | |
1257 { | |
1258 namefile <- "RatingGLM.txt" | |
1259 | |
1260 cat("###########################################################################", | |
1261 "\n########################### Analysis evaluation ###########################", | |
1262 "\n###########################################################################", file=namefile, fill=1,append=TRUE) | |
1263 | |
1264 ## Informations on model : | |
1265 cat("\n\n######################################### \nFitted model:", file=namefile, fill=1,append=TRUE) | |
1266 cat("\t", deparse(exprML), "\n\n", file=namefile, sep="",append=TRUE) | |
1267 cat("Family: ", sum$family[[1]], | |
1268 file=namefile,append=TRUE) | |
1269 cat("\n\nNumber of analysis: ", nrow(tabRate), file=namefile, append=TRUE) | |
1270 | |
1271 ## Global rate : | |
1272 cat("\n\n######################################### \nGlobal rate for all analysis:", | |
1273 "\n\n", RateM, "out of 10", file=namefile, append=TRUE) | |
1274 | |
1275 ## details on every GLM : | |
1276 #NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA | |
1277 cat("\n\n######################################### \nDetails on every analysis:\n\n", file=namefile, append=TRUE) | |
1278 cat("Analysis\tC1\tC2\tC3\tC4\tC5\tC6\tC7\tC8\tC9\tFinal rate", file=namefile, append=TRUE) | |
1279 apply(tabRate, 1, FUN=function(x) | |
1280 { | |
1281 | |
1282 if (!is.na(x["complete_plan"]) && x["complete_plan"]==TRUE) | |
1283 { | |
1284 cat("\n",x[1],"\tyes", file=namefile, append=TRUE) | |
1285 }else{ | |
1286 cat("\n",x[1],"\tno", file=namefile, append=TRUE) | |
1287 } | |
1288 | |
1289 for (i in c("balanced_plan","NA_proportion_OK", "no_residual_dispersion", "uniform_residuals", "outliers_proportion_OK", "no_zero_inflation", "observation_factor_ratio_OK", "enough_levels_random_effect")) | |
1290 { | |
1291 if (!is.na(x[i]) && x[i]==TRUE) | |
1292 { | |
1293 cat("\tyes", file=namefile, append=TRUE) | |
1294 }else{ | |
1295 cat("\tno", file=namefile, append=TRUE) | |
1296 } | |
1297 } | |
1298 | |
1299 cat("\t",x["rate"], "/ 8", file=namefile, append=TRUE) | |
1300 | |
1301 | |
1302 }) | |
1303 cat("\n\nC1: Complete plan?\nC2: Balanced plan?\nC3: Few NA?\nC4: Regular dispersion?\nC5: Uniform residuals?\nC6: Regular outliers proportion?\nC7: No zero-inflation?\nC8: Enough observations for the amount of factors?\nC9: Enough levels on random effect?", file=namefile, append=TRUE) | |
1304 | |
1305 ## Red flags - advice : | |
1306 cat("\n\n######################################### \nRed flags - advice:\n\n", file=namefile, append=TRUE) | |
1307 if (all(na.omit(tabRate["NA_proportion_OK"]) == FALSE)) | |
1308 { | |
1309 cat("\n","\t- More than 10% of your dataset bares NAs", file=namefile, append=TRUE) | |
1310 }else{} | |
1311 | |
1312 if (length(grep("FALSE",tabRate["no_residual_dispersion"])) / length(na.omit(tabRate["no_residual_dispersion"])) > 0.5) | |
1313 { | |
1314 cat("\n","\t- More than 50% of your analyses are over- or under- dispersed : Try with another distribution family", file=namefile, append=TRUE) | |
1315 }else{} | |
1316 | |
1317 if (length(grep("FALSE",tabRate["uniform_residuals"])) / length(na.omit(tabRate["uniform_residuals"])) > 0.5) | |
1318 { | |
1319 cat("\n","\t- More than 50% of your analyses haven't an uniform distribution of residuals : Try with another distribution family", file=namefile, append=TRUE) | |
1320 }else{} | |
1321 | |
1322 if (length(grep("FALSE",tabRate["outliers_proportion_OK"])) / length(na.omit(tabRate["outliers_proportion_OK"])) > 0.5) | |
1323 { | |
1324 cat("\n","\t- More than 50% of your analyses have too much outliers : Try with another distribution family or try to select your data", file=namefile, append=TRUE) | |
1325 }else{} | |
1326 | |
1327 if (length(grep("FALSE",tabRate["no_zero_inflation"])) / length(na.omit(tabRate["no_zero_inflation"])) > 0.5) | |
1328 { | |
1329 cat("\n","\t- More than 50% of your analyses have zero inflation : Try to select your data", file=namefile, append=TRUE) | |
1330 }else{} | |
1331 | |
1332 if (length(grep("FALSE",tabRate["observation_factor_ratio_OK"])) / length(na.omit(tabRate["observation_factor_ratio_OK"])) > 0.5) | |
1333 { | |
1334 cat("\n","\t- More than 50% of your analyses have not enough observations for the amount of factors : Try to use less factors in your analysis or try to use another separation factor", file=namefile, append=TRUE) | |
1335 }else{} | |
1336 | |
1337 if (any(tabRate["enough_levels_random_effect"] == FALSE, na.rm=TRUE) && length(grep("^glmmTMB", objLM$call)) > 0) | |
1338 { | |
1339 cat("\n","\t- Random effect hasn't enough levels to be robust : If it has less than ten levels remove the random effect", file=namefile, append=TRUE) | |
1340 }else{} | |
1341 }else{ | |
1342 | |
1343 return(RateM) | |
1344 | |
1345 } | |
1346 } | |
1347 | |
1348 ######################################### end of the function noteGLM.f | |
1349 | |
1350 ######################################### start of the function infoStats.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f | |
1351 | |
1352 infoStats.f <- function(filename, Data, agregLevel=c("species", "unitobs"), type=c("graph", "stat"), | |
1353 metrique, factGraph, factGraphSel, listFact, listFactSel) | |
1354 { | |
1355 ## Purpose: Écrire les infos et statistic sur les données associées à | |
1356 ## un graphique ou analyse. | |
1357 ## ---------------------------------------------------------------------- | |
1358 ## Arguments: filename : chemin du fichier de résultats. | |
1359 ## Data : données du graphique/de l'analyse. | |
1360 ## agregLevel : niveau d'agrégation de la fonction appelante. | |
1361 ## type : type de fonction appelante (grapique ou analyse). | |
1362 ## metrique : la métrique choisie. | |
1363 ## factGraph : le facteur sélection des espèces. | |
1364 ## factGraphSel : la sélection de modalités pour ce dernier | |
1365 ## listFact : liste du (des) facteur(s) de regroupement | |
1366 ## listFactSel : liste des modalités sélectionnées pour ce(s) | |
1367 ## dernier(s) | |
1368 ## ---------------------------------------------------------------------- | |
1369 ## Author: Yves Reecht, Date: 10 sept. 2012, 15:26 modified by Coline ROYAUX 04 june 2020 | |
1370 | |
1371 ## Open file : | |
1372 File <- file(description=filename, | |
1373 open="w", encoding="latin1") | |
1374 | |
1375 ## if error : | |
1376 on.exit(if (exists("filename") && | |
1377 tryCatch(isOpen(File), | |
1378 error=function(e)return(FALSE))) close(File)) | |
1379 | |
1380 ## Metrics and factors infos : | |
1381 printSelectionInfo.f(metrique=metrique, #factGraph=factGraph, factGraphSel=factGraphSel, | |
1382 listFact=listFact, #listFactSel=listFactSel, | |
1383 File=File, | |
1384 agregLevel=agregLevel, type=type) | |
1385 | |
1386 ## statistics : | |
1387 if (class(Data) == "list") | |
1388 { | |
1389 cat("\n###################################################", | |
1390 "\nStatistics per level of splitting factor:\n", | |
1391 sep="", file=File,append=TRUE) | |
1392 | |
1393 invisible(sapply(1:length(Data), | |
1394 function(i) | |
1395 { | |
1396 printStats.f(Data=Data[[i]], metrique=metrique, listFact=listFact, File=File, | |
1397 headline=factGraphSel[i]) | |
1398 })) | |
1399 }else{ | |
1400 printStats.f(Data=Data, metrique=metrique, listFact=listFact, File=File, | |
1401 headline=NULL) | |
1402 } | |
1403 | |
1404 ## Close file : | |
1405 close(File) | |
1406 | |
1407 } | |
1408 | |
1409 ######################################### end of the function infoStats.f | |
1410 | |
1411 | |
1412 ######################################### start of the function printSelectionInfo.f called by infoStats.f | |
1413 | |
1414 printSelectionInfo.f <- function(metrique, listFact, | |
1415 File, | |
1416 agregLevel=c("species", "unitobs"), type=c("graph", "stat")) | |
1417 { | |
1418 ## Purpose: Write data informations | |
1419 ## ---------------------------------------------------------------------- | |
1420 ## Arguments: metrique : chosen metric | |
1421 ## listFact : factor's list | |
1422 ## File : Results file name | |
1423 ## agregLevel : aggregation level | |
1424 ## type : function type | |
1425 ## ---------------------------------------------------------------------- | |
1426 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:41 modified by Coline ROYAUX 04 june 2020 | |
1427 | |
1428 cat("\n##################################################\n", | |
1429 "Metrics and factors (and possible units/selections):\n", | |
1430 sep="", file=File,append=TRUE) | |
1431 | |
1432 ## metric info : | |
1433 cat("\n Metrics:", metrique, | |
1434 "\n", file=File,append=TRUE) | |
1435 | |
1436 ## aggregation level : | |
1437 cat(" aggregated per ", | |
1438 switch(agregLevel, | |
1439 "CL_espece"=,"CL_unitobs"=,"spCL_unitobs"=,"spCL_espece"={ | |
1440 "size class / " | |
1441 }), | |
1442 switch(agregLevel, | |
1443 "CL_espece"=,"spCL_espece"=,"species"=,"spSpecies"=,"spEspece"={ | |
1444 "species / " | |
1445 }), | |
1446 switch(agregLevel, | |
1447 "spUnitobs"=,"spCL_unitobs"=,"spCL_espece"=,"spUnitobs(CL)"=,"spSpecies"=,"spEspece"={ | |
1448 paste(listFact, " (mean over ", sep="") | |
1449 }), | |
1450 "observation units", | |
1451 switch(agregLevel, | |
1452 "spUnitobs"=,"spCL_unitobs"=,"spCL_espece"=,"spUnitobs(CL)"=,"spSpecies"=,"spEspece"={ | |
1453 ")" | |
1454 }), | |
1455 ".\n", | |
1456 sep="", file=File,append=TRUE) | |
1457 | |
1458 ## Separation factors : | |
1459 # switch(agregLevel, | |
1460 # "species"=,"CL_espece"=,"espece"={ # Adapté également pour les LMs. | |
1461 # cat("\n", | |
1462 # switch(type, | |
1463 # "graph"="Graphics separation factor", | |
1464 # "stat"="Analyses separation factor"), | |
1465 # " : ", | |
1466 # ifelse(factGraph == "", "printSelectionInfo.f.11", | |
1467 # ifelse(is.na(factGraphSel[1]), | |
1468 # paste(varNames.f(factGraph, "nom"), "none!"), | |
1469 # paste(varNames.f(factGraph, "nom"), " (", | |
1470 # paste(factGraphSel, collapse=", "), ")", sep=""))), "\n", | |
1471 # sep="", file=File,append=TRUE) | |
1472 # }, | |
1473 # "unitobs"=,"CL_unitobs"=,"unitobs(CL)"=,"spUnitobs"={ | |
1474 # cat("(warning: no selection!!!)", | |
1475 # ifelse(factGraph == "", "\nSelection factor for aggregation of observations: ", | |
1476 # ifelse(is.na(factGraphSel[1]), | |
1477 # paste(varNames.f(factGraph, "nom"), "none (all species/size classes)!"), | |
1478 # paste(varNames.f(factGraph, "nom"), " (", | |
1479 # paste(factGraphSel, collapse=", "), ")", sep=""))), "\n", | |
1480 # sep="", file=File,append=TRUE) | |
1481 # }) | |
1482 | |
1483 ## Clustering factors : | |
1484 if (is.element(agregLevel, c("spCL_unitobs", "spCL_espece", "spSpecies", "spEspece", | |
1485 "spUnitobs", "spUnitobs(CL)"))) {type <- "spatialGraph"} | |
1486 | |
1487 cat(switch(type, | |
1488 "graph"="\nGrouping factor(s): \n * ", | |
1489 "stat"="\nAnalyses factor(s): \n * ", | |
1490 "spatialGraph"="\nSpatial aggregation factor(s): \n * "), | |
1491 paste(listFact,collaspe="\n * "),"\n",file=File,append=TRUE) | |
1492 | |
1493 # invisible(sapply(1:length(listFact), | |
1494 # function(i) | |
1495 # { | |
1496 # cat("\n * ", | |
1497 # ifelse(is.na(listFactSel[[i]][1]), | |
1498 # paste(varNames.f(listFact[i], "nom"), "(no selection)"), | |
1499 # paste(varNames.f(listFact[i], "nom"), " (", | |
1500 # paste(listFactSel[[i]], collapse=", "), ")", sep="")), "\n", | |
1501 # sep="", file=File,append=TRUE) | |
1502 # })) | |
1503 } | |
1504 | |
1505 ######################################### end of the function printSelectionInfo.f | |
1506 | |
1507 | |
1508 ######################################### start of the function printStats.f called by infoStats.f | |
1509 | |
1510 printStats.f <- function(Data, metrique, listFact, File, headline=NULL) | |
1511 { | |
1512 ## Purpose: Write general statistics table | |
1513 ## ---------------------------------------------------------------------- | |
1514 ## Arguments: Data : Analysis data | |
1515 ## metrique : metric's name | |
1516 ## listFact : Factor's list | |
1517 ## File : Simple statistics file name | |
1518 ## ---------------------------------------------------------------------- | |
1519 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:09 modified by Coline ROYAUX 04 june 2020 | |
1520 | |
1521 ## Header : | |
1522 if ( ! is.null(headline)) | |
1523 { | |
1524 cat("\n", rep("#", nchar(headline) + 3), "\n", | |
1525 "## ", headline, "\n", | |
1526 sep="", file=File,append=TRUE) | |
1527 }else{} | |
1528 | |
1529 cat("\n########################\nBase statistics:\n\n", file=File,append=TRUE) | |
1530 | |
1531 capture.output(print(summary.fr(Data[ , metrique])), file=File, append=TRUE) | |
1532 | |
1533 if ( ! is.null(listFact)) | |
1534 { | |
1535 cat("\n#########################################", | |
1536 "\nStatistics per combination of factor levels:\n\n", file=File, sep="",append=TRUE) | |
1537 | |
1538 ## Compute summary for each existing factor's cross : | |
1539 res <- with(Data, | |
1540 tapply(eval(parse(text=metrique)), | |
1541 INDEX=do.call(paste, | |
1542 c(lapply(listFact, | |
1543 function(y)eval(parse(text=y))), | |
1544 sep=".")), | |
1545 FUN=summary.fr)) | |
1546 | |
1547 ## results in table | |
1548 capture.output(print(do.call(rbind, res)), | |
1549 file=File, append=TRUE) | |
1550 }else{} | |
1551 | |
1552 ## empty line : | |
1553 cat("\n", file=File,append=TRUE) | |
1554 } | |
1555 | |
1556 ######################################### end of the function printStats.f | |
1557 | |
1558 | |
1559 ######################################### start of the function summary.fr called by printStats.f | |
1560 summary.fr <- function(object, digits = max(3, getOption("digits") - 3),...) | |
1561 { | |
1562 ## Purpose: Adding SD and N to summary | |
1563 ## ---------------------------------------------------------------------- | |
1564 ## Arguments: object : Object to summarise | |
1565 ## ---------------------------------------------------------------------- | |
1566 ## Author: Yves Reecht, Date: 13 sept. 2012, 15:47 modified by Coline ROYAUX 04 june 2020 | |
1567 | |
1568 if ( ! is.numeric(object)) stop("Programming error") | |
1569 | |
1570 ## Compute summary : | |
1571 res <- c(summary(object=object, digits, ...), "sd"=signif(sd(x=object), digits=digits), "N"=length(object)) | |
1572 | |
1573 return(res) | |
1574 } | |
1575 | |
1576 ######################################### start of the function summary.fr | |
1577 | |
1578 ######################################### Package DHARMa | |
1579 | |
1580 ################ simulateResiduals.R | |
1581 | |
1582 #' Create simulated residuals | |
1583 #' | |
1584 #' The function creates scaled residuals by simulating from the fitted model. Residuals can be extracted with \code{\link{residuals.DHARMa}}. See \code{\link{testResiduals}} for an overview of residual tests, \code{\link{plot.DHARMa}} for an overview of available plots. | |
1585 #' | |
1586 #' @param fittedModel a fitted model of a class supported by DHARMa | |
1587 #' @param n number of simulations. Default is 100. A more save value would be 250 or even 1000. The smaller the number, the higher the stochastic error on the residuals. Also, for very small n, discretization artefacts can influence the tests. | |
1588 #' @param refit if FALSE, new data will be simulated and scaled residuals will be created by comparing observed data with new data. If TRUE, the model will be refit on the simulated data (parametric bootstrap), and scaled residuals will be created by comparing observed with refitted residuals. | |
1589 #' @param integerResponse if TRUE, noise will be added at to the residuals to maintain a uniform expectations for integer responses (such as Poisson or Binomial). Usually, the model will automatically detect the appropriate setting, so there is no need to adjust this setting. | |
1590 #' @param plot if TRUE, \code{\link{plotResiduals}} will be directly run after the residuals have been calculated | |
1591 #' @param ... parameters to pass to the simulate function of the model object. An important use of this is to specify whether simulations should be conditional on the current random effect estimates, e.g. via re.form. Note that not all models support syntax to specify conditionao or unconditional simulations. See also details | |
1592 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details. | |
1593 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}} | |
1594 #' @return An S3 class of type "DHARMa", essentially a list with various elements. Implemented S3 functions include plot, print and \code{\link{residuals.DHARMa}}. Residuals returns the calculated scaled residuals. | |
1595 #' | |
1596 #' @details There are a number of important considerations when simulating from a more complex (hierarchical) model: | |
1597 #' | |
1598 #' \strong{Re-simulating random effects / hierarchical structure}: in a hierarchical model, we have several stochastic processes aligned on top of each other. Specifically, in a GLMM, we have a lower level stochastic process (random effect), whose result enters into a higher level (e.g. Poisson distribution). For other hierarchical models such as state-space models, similar considerations apply. | |
1599 #' | |
1600 #' In such a situation, we have to decide if we want to re-simulate all stochastic levels, or only a subset of those. For example, in a GLMM, it is common to only simulate the last stochastic level (e.g. Poisson) conditional on the fitted random effects. This is often referred to as a conditional simuation. For controlling how many levels should be re-simulated, the simulateResidual function allows to pass on parameters to the simulate function of the fitted model object. Please refer to the help of the different simulate functions (e.g. ?simulate.merMod) for details. For merMod (lme4) model objects, the relevant parameters are parameters are use.u and re.form | |
1601 #' | |
1602 #' If the model is correctly specified, the simulated residuals should be flat regardless how many hierarchical levels we re-simulate. The most thorough procedure would therefore be to test all possible options. If testing only one option, I would recommend to re-simulate all levels, because this essentially tests the model structure as a whole. This is the default setting in the DHARMa package. A potential drawback is that re-simulating the lower-level random effects creates more variability, which may reduce power for detecting problems in the upper-level stochastic processes. In particular dispersion tests may produce different results when switching from conditional to unconditional simulations, and often the conditional simulation is more sensitive. | |
1603 #' | |
1604 #' \strong{Integer responses}: a second complication is the treatment of inter responses. Imaging we have observed a 0, and we predict 30\% zeros - what is the quantile that we should display for the residual? To deal with this problem and maintain a uniform response, the option integerResponse adds a uniform noise from -0.5 to 0.5 on the simulated and observed response, which creates a uniform distribution - you can see this via hist(ecdf(runif(10000))(runif(10000))). | |
1605 #' | |
1606 #' DHARMa will try to automatically if the fitted model has an integer or discrete distribution via the family argument. However, in some cases the family does not allow to uniquely identify the distribution type. For example, a tweedie distribution can be inter or continuous. Therefore, DHARMa will additionally check the simulation results for repeated values, and will change the distribution type if repeated values are found (a message is displayed in this case). | |
1607 #' | |
1608 #' \strong{Refitting or not}: a third issue is how residuals are calculated. simulateResiduals has two options that are controlled by the refit parameter: | |
1609 #' | |
1610 #' 1. if refit = FALSE (default), new data is simulated from the fitted model, and residuals are calculated by comparing the observed data to the new data | |
1611 #' | |
1612 #' 2. if refit = TRUE, a parametric bootstrap is performed, meaning that the model is refit on the new data, and residuals are created by comparing observed residuals against refitted residuals. I advise against using this method per default (see more comments in the vignette), unless you are really sure that you need it. | |
1613 #' | |
1614 #' \strong{Residuals per group}: In many situations, it can be useful to look at residuals per group, e.g. to see how much the model over / underpredicts per plot, year or subject. To do this, use \code{\link{recalculateResiduals}}, together with a grouping variable (see also help) | |
1615 #' | |
1616 #' \strong{Transformation to other distributions}: DHARMa calculates residuals for which the theoretical expectation (assuming a correctly specified model) is uniform. To transfor this residuals to another distribution (e.g. so that a correctly specified model will have normal residuals) see \code{\link{residuals.DHARMa}}. | |
1617 #' | |
1618 #' @seealso \code{\link{testResiduals}}, \code{\link{plot.DHARMa}}, \code{\link{plotResiduals}}, \code{\link{print.DHARMa}}, \code{\link{residuals.DHARMa}}, \code{\link{recalculateResiduals}} | |
1619 #' | |
1620 #' | |
1621 #' @example inst/examples/simulateResidualsHelp.R | |
1622 #' @import stats | |
1623 #' @export | |
1624 simulateResiduals <- function(fittedModel, n = 250, refit = F, integerResponse = NULL, plot = F, seed = 123, method = c("PIT", "traditional"), ...){ | |
1625 | |
1626 ######## general assertions and startup calculations ########## | |
1627 | |
1628 if (n < 2) stop("error in DHARMa::simulateResiduals: n > 1 is required to calculate scaled residuals") | |
1629 checkModel(fittedModel) | |
1630 match.arg(method) | |
1631 randomState <-getRandomState(seed) | |
1632 on.exit({randomState$restoreCurrent()}) | |
1633 ptm <- proc.time() | |
1634 | |
1635 ####### extract model info ############ | |
1636 | |
1637 out = list() | |
1638 | |
1639 family = family(fittedModel) | |
1640 out$fittedModel = fittedModel | |
1641 out$modelClass = class(fittedModel)[1] | |
1642 | |
1643 out$nObs = nobs(fittedModel) | |
1644 out$nSim = n | |
1645 out$refit = refit | |
1646 out$observedResponse = getObservedResponse(fittedModel) | |
1647 | |
1648 if(is.null(integerResponse)){ | |
1649 if (family$family %in% c("binomial", "poisson", "quasibinomial", "quasipoisson", "Negative Binom", "nbinom2", "nbinom1", "genpois", "compois", "truncated_poisson", "truncated_nbinom2", "truncated_nbinom1", "betabinomial", "Poisson", "Tpoisson", "COMPoisson", "negbin", "Tnegbin") | grepl("Negative Binomial",family$family) ) integerResponse = TRUE | |
1650 else integerResponse = FALSE | |
1651 } | |
1652 out$integerResponse = integerResponse | |
1653 | |
1654 out$problems = list() | |
1655 | |
1656 # re-form should be set to ~0 to avoid spurious residual patterns, see https://github.com/florianhartig/DHARMa/issues/43 | |
1657 | |
1658 if(out$modelClass %in% c("HLfit")){ | |
1659 out$fittedPredictedResponse = predict(fittedModel, type = "response", re.form = ~0)[,1L] | |
1660 }else{ | |
1661 out$fittedPredictedResponse = predict(fittedModel, type = "response", re.form = ~0) | |
1662 } | |
1663 | |
1664 out$fittedFixedEffects = getFixedEffects(fittedModel) | |
1665 out$fittedResiduals = residuals(fittedModel, type = "response") | |
1666 | |
1667 ######## refit = F ################## | |
1668 | |
1669 if (refit == FALSE){ | |
1670 | |
1671 out$simulatedResponse = getSimulations(fittedModel, nsim = n, type = "normal", ...) | |
1672 | |
1673 checkSimulations(out$simulatedResponse, out$nObs, out$nSim) | |
1674 | |
1675 out$scaledResiduals = getQuantile(simulations = out$simulatedResponse , observed = out$observedResponse , integerResponse = integerResponse, method = method) | |
1676 | |
1677 ######## refit = T ################## | |
1678 } else { | |
1679 | |
1680 # Adding new outputs | |
1681 | |
1682 out$refittedPredictedResponse <- matrix(nrow = out$nObs, ncol = n ) | |
1683 out$refittedFixedEffects <- matrix(nrow = length(out$fittedFixedEffects), ncol = n ) | |
1684 #out$refittedRandomEffects <- matrix(nrow = length(out$fittedRandomEffects), ncol = n ) | |
1685 out$refittedResiduals = matrix(nrow = out$nObs, ncol = n) | |
1686 out$refittedPearsonResiduals = matrix(nrow = out$nObs, ncol = n) | |
1687 | |
1688 out$simulatedResponse = getSimulations(fittedModel, nsim = n, type = "refit", ...) | |
1689 | |
1690 for (i in 1:n){ | |
1691 | |
1692 simObserved = out$simulatedResponse[[i]] | |
1693 | |
1694 try({ | |
1695 | |
1696 # for testing | |
1697 # if (i==3) stop("x") | |
1698 # Note: also set silent = T for production | |
1699 | |
1700 refittedModel = getRefit(fittedModel, simObserved) | |
1701 | |
1702 out$refittedPredictedResponse[,i] = predict(refittedModel, type = "response") | |
1703 out$refittedFixedEffects[,i] = getFixedEffects(refittedModel) | |
1704 out$refittedResiduals[,i] = residuals(refittedModel, type = "response") | |
1705 out$refittedPearsonResiduals[,i] = residuals(refittedModel, type = "pearson") | |
1706 #out$refittedRandomEffects[,i] = ranef(refittedModel) | |
1707 }, silent = TRUE) | |
1708 } | |
1709 | |
1710 ######### residual checks ########### | |
1711 | |
1712 if(anyNA(out$refittedResiduals)) warning("DHARMa::simulateResiduals warning: on refit = TRUE, at least one of the refitted models produced an error. Inspect the refitted model values. Results may not be reliable.") | |
1713 | |
1714 ## check for convergence problems | |
1715 | |
1716 dup = sum(duplicated(out$refittedFixedEffects, MARGIN = 2)) | |
1717 if (dup > 0){ | |
1718 if (dup < n/3){ | |
1719 warning(paste("There were", dup, "of", n ,"duplicate parameter estimates in the refitted models. This may hint towards a problem with optimizer convergence in the fitted models. Results may not be reliable. The suggested action is to not use the refitting procedure, and diagnose with tools available for the normal (not refitted) simulated residuals. If you absolutely require the refitting procedure, try changing tolerance / iterations in the optimizer settings.")) | |
1720 } else { | |
1721 warning(paste("There were", dup, "of", n ,"duplicate parameter estimates in the refitted models. This may hint towards a problem with optimizer convergence in the fitted models. Results are likely not reliable. The suggested action is to not use the refitting procedure, and diagnose with tools available for the normal (not refitted) simulated residuals. If you absolutely require the refitting procedure, try changing tolerance / iterations in the optimizer settings.")) | |
1722 out$problems[[length(out$problems)+ 1]] = "error in refit" | |
1723 } | |
1724 } | |
1725 | |
1726 ######### residual calculations ########### | |
1727 | |
1728 out$scaledResiduals = getQuantile(simulations = out$refittedResiduals, observed = out$fittedResiduals, integerResponse = integerResponse, method = method) | |
1729 } | |
1730 | |
1731 ########### Wrapup ############ | |
1732 | |
1733 out$time = proc.time() - ptm | |
1734 out$randomState = randomState | |
1735 | |
1736 class(out) = "DHARMa" | |
1737 | |
1738 if(plot == TRUE) plot(out) | |
1739 | |
1740 return(out) | |
1741 } | |
1742 | |
1743 getPossibleModels<-function()c("lm", "glm", "negbin", "lmerMod", "glmerMod", "gam", "bam", "glmmTMB", "HLfit") | |
1744 | |
1745 | |
1746 | |
1747 #' Check if the fitted model is supported by DHARMa | |
1748 #' | |
1749 #' The function checks if the fitted model is supported by DHARMa, and if there are other issues that could create problems | |
1750 #' | |
1751 #' @param fittedModel a fitted model | |
1752 #' @param stop whether to throw an error if the model is not supported by DHARMa | |
1753 #' | |
1754 #' @details The main purpose of this function os to check if the fitted model class is supported by DHARMa. The function additionally checks for properties of the fitted model that could create problems for calculating residuals or working with the resuls in DHARMa. | |
1755 #' | |
1756 #' | |
1757 #' @keywords internal | |
1758 checkModel <- function(fittedModel, stop = F){ | |
1759 | |
1760 out = T | |
1761 | |
1762 if(!(class(fittedModel)[1] %in% getPossibleModels())){ | |
1763 if(stop == FALSE) warning("DHARMa: fittedModel not in class of supported models. Absolutely no guarantee that this will work!") | |
1764 else stop("DHARMa: fittedModel not in class of supported models") | |
1765 } | |
1766 | |
1767 # if(hasNA(fittedModel)) message("It seems there were NA values in the data used for fitting the model. This can create problems if you supply additional data to DHARMa functions. See ?checkModel for details") | |
1768 | |
1769 # TODO: check as implemented does not work reliably, check if there is any other option to check for NA | |
1770 # #' @example inst/examples/checkModelHelp.R | |
1771 | |
1772 # NA values in the data: checkModel will detect if there were NA values in the data frame. For NA values, most regression models will remove the entire observation from the data. This is not a problem for DHARMa - residuals are then only calculated for non-NA rows in the data. However, if you provide additional predictors to DHARMa, for example to plot residuals against a predictor, you will have to remove all NA rows that were also removed in the model. For most models, you can get the rows of the data that were actually used in the fit via rownames(model.frame(fittedModel)) | |
1773 | |
1774 | |
1775 if (class(fittedModel)[1] == "gam" ) if (class(fittedModel$family)[1] == "extended.family") stop("It seems you are trying to fit a model from mgcv that was fit with an extended.family. Simulation functions for these families are not yet implemented in DHARMa. See issue https://github.com/florianhartig/DHARMa/issues/11 for updates about this") | |
1776 | |
1777 } | |
1778 | |
1779 | |
1780 | |
1781 #' Check simulated data | |
1782 #' | |
1783 #' The function checks if the simulated data seems fine | |
1784 #' | |
1785 #' @param simulatedResponse the simulated response | |
1786 #' @param nObs number of observations | |
1787 #' @param nSim number of simulations | |
1788 #' | |
1789 #' @keywords internal | |
1790 checkSimulations <- function(simulatedResponse, nObs, nSim){ | |
1791 | |
1792 if(!inherits(simulatedResponse, "matrix")) securityAssertion("Simulation from the model produced wrong class", stop = T) | |
1793 | |
1794 if(any(dim(simulatedResponse) != c(nObs, nSim) )) securityAssertion("Simulation from the model produced wrong dimension", stop = T) | |
1795 | |
1796 if(any(!is.finite(simulatedResponse))) message("Simulations from your fitted model produce infinite values. Consider if this is sensible") | |
1797 | |
1798 if(any(is.nan(simulatedResponse))) securityAssertion("Simulations from your fitted model produce NaN values. DHARMa cannot calculated residuals for this. This is nearly certainly an error of the regression package you are using", stop = T) | |
1799 if(any(is.na(simulatedResponse))) securityAssertion("Simulations from your fitted model produce NA values. DHARMa cannot calculated residuals for this. This is nearly certainly an error of the regression package you are using", stop = T) | |
1800 | |
1801 } | |
1802 | |
1803 | |
1804 | |
1805 | |
1806 #' Recalculate residuals with grouping | |
1807 #' | |
1808 #' The purpose of this function is to recalculate scaled residuals per group, based on the simulations done by \code{\link{simulateResiduals}} | |
1809 #' | |
1810 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}} | |
1811 #' @param group group of each data point | |
1812 #' @param aggregateBy function for the aggregation. Default is sum. This should only be changed if you know what you are doing. Note in particular that the expected residual distribution might not be flat any more if you choose general functions, such as sd etc. | |
1813 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details. | |
1814 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}} | |
1815 #' @return an object of class DHARMa, similar to what is returned by \code{\link{simulateResiduals}}, but with additional outputs for the new grouped calculations. Note that the relevant outputs are 2x in the object, the first is the grouped calculations (which is returned by $name access), and later another time, under identical name, the original output. Moreover, there is a function 'aggregateByGroup', which can be used to aggregate predictor variables in the same way as the variables calculated here | |
1816 #' | |
1817 #' @example inst/examples/simulateResidualsHelp.R | |
1818 #' @export | |
1819 recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = sum, seed = 123, method = c("PIT", "traditional")){ | |
1820 | |
1821 randomState <-getRandomState(seed) | |
1822 on.exit({randomState$restoreCurrent()}) | |
1823 match.arg(method) | |
1824 | |
1825 if(!is.null(simulationOutput$original)) simulationOutput = simulationOutput$original | |
1826 | |
1827 out = list() | |
1828 out$original = simulationOutput | |
1829 | |
1830 if(is.null(group)) return(simulationOutput) | |
1831 else group =as.factor(group) | |
1832 out$nGroups = nlevels(group) | |
1833 | |
1834 aggregateByGroup <- function(x) aggregate(x, by=list(group), FUN=aggregateBy)[,2] | |
1835 | |
1836 out$observedResponse = aggregateByGroup(simulationOutput$observedResponse) | |
1837 out$fittedPredictedResponse = aggregateByGroup(simulationOutput$fittedPredictedResponse) | |
1838 | |
1839 if (simulationOutput$refit == F){ | |
1840 | |
1841 out$simulatedResponse = apply(simulationOutput$simulatedResponse, 2, aggregateByGroup) | |
1842 out$scaledResiduals = getQuantile(simulations = out$simulatedResponse , observed = out$observedResponse , integerResponse = simulationOutput$integerResponse, method = method) | |
1843 | |
1844 ######## refit = T ################## | |
1845 } else { | |
1846 | |
1847 out$refittedPredictedResponse <- apply(simulationOutput$refittedPredictedResponse, 2, aggregateByGroup) | |
1848 out$fittedResiduals = aggregateByGroup(simulationOutput$fittedResiduals) | |
1849 out$refittedResiduals = apply(simulationOutput$refittedResiduals, 2, aggregateByGroup) | |
1850 out$refittedPearsonResiduals = apply(simulationOutput$refittedPearsonResiduals, 2, aggregateByGroup) | |
1851 | |
1852 out$scaledResiduals = getQuantile(simulations = out$refittedResiduals , observed = out$fittedResiduals , integerResponse = simulationOutput$integerResponse, method = method) | |
1853 | |
1854 } | |
1855 | |
1856 # hack - the c here will result in both old and new outputs to be present resulting output, but a named access should refer to the new, grouped calculations | |
1857 # question to myself - what's the use of that, why not erase the old outputs? they are anyway saved in the old object | |
1858 | |
1859 out$aggregateByGroup = aggregateByGroup | |
1860 out = c(out, simulationOutput) | |
1861 out$randomState = randomState | |
1862 class(out) = "DHARMa" | |
1863 return(out) | |
1864 } | |
1865 | |
1866 ################ simulateResiduals.R | |
1867 | |
1868 ################ DHARMa.R | |
1869 | |
1870 #' @title DHARMa - Residual Diagnostics for HierArchical (Multi-level / Mixed) Regression Models | |
1871 #' @name DHARMa | |
1872 #' @docType package | |
1873 #' @description The 'DHARMa' package uses a simulation-based approach to create readily interpretable scaled (quantile) residuals for fitted (generalized) linear mixed models. Currently supported are linear and generalized linear (mixed) models from 'lme4' (classes 'lmerMod', 'glmerMod'), 'glmmTMB' and 'spaMM', generalized additive models ('gam' from 'mgcv'), 'glm' (including 'negbin' from 'MASS', but excluding quasi-distributions) and 'lm' model classes. Moreover, externally created simulations, e.g. posterior predictive simulations from Bayesian software such as 'JAGS', 'STAN', or 'BUGS' can be processed as well. The resulting residuals are standardized to values between 0 and 1 and can be interpreted as intuitively as residuals from a linear regression. The package also provides a number of plot and test functions for typical model misspecification problems, such as over/underdispersion, zero-inflation, and residual spatial and temporal autocorrelation. | |
1874 #' @details See index / vignette for details | |
1875 #' @seealso \code{\link{simulateResiduals}} | |
1876 #' @examples | |
1877 #' vignette("DHARMa", package="DHARMa") | |
1878 NULL | |
1879 | |
1880 | |
1881 #' Print simulated residuals | |
1882 #' | |
1883 #' @param x an object with simulated residuals created by \code{\link{simulateResiduals}} | |
1884 #' @param ... optional arguments for compatibility with the generic function, no function implemented | |
1885 #' @export | |
1886 print.DHARMa <- function(x, ...){ | |
1887 cat(paste("Object of Class DHARMa with simulated residuals based on", x$nSim, "simulations with refit =", x$refit , ". See ?DHARMa::simulateResiduals for help."), "\n", "\n") | |
1888 if (length(x$scaledResiduals) < 20) cat("Scaled residual values:", x$scaledResiduals) | |
1889 else { | |
1890 cat("Scaled residual values:", x$scaledResiduals[1:20], "...") | |
1891 } | |
1892 } | |
1893 | |
1894 #' Return residuals of a DHARMa simulation | |
1895 #' | |
1896 #' @param object an object with simulated residuals created by \code{\link{simulateResiduals}} | |
1897 #' @param quantileFunction optional - a quantile function to transform the uniform 0/1 scaling of DHARMa to another distribution | |
1898 #' @param outlierValues if a quantile function with infinite support (such as dnorm) is used, residuals that are 0/1 are mapped to -Inf / Inf. outlierValues allows to convert -Inf / Inf values to an optional min / max value. | |
1899 #' @param ... optional arguments for compatibility with the generic function, no function implemented | |
1900 #' @details the function accesses the slot $scaledResiduals in a fitted DHARMa object, and optionally transforms the standard DHARMa quantile residuals (which have a uniform distribution) to a particular pdf. | |
1901 #' | |
1902 #' @note some of the papers on simulated quantile residuals transforming the residuals (which are natively uniform) back to a normal distribution. I presume this is because of the larger familiarity of most users with normal residuals. Personally, I never considered this desirable, for the reasons explained in https://github.com/florianhartig/DHARMa/issues/39, but with this function, I wanted to give users the option to plot normal residuals if they so wish. | |
1903 #' | |
1904 #' @export | |
1905 #' @example inst/examples/simulateResidualsHelp.R | |
1906 #' | |
1907 residuals.DHARMa <- function(object, quantileFunction = NULL, outlierValues = NULL, ...){ | |
1908 | |
1909 if(is.null(quantileFunction)){ | |
1910 return(object$scaledResiduals) | |
1911 } else { | |
1912 res = quantileFunction(object$scaledResiduals) | |
1913 if(!is.null(outlierValues)){ | |
1914 res = ifelse(res == -Inf, outlierValues[1], res) | |
1915 res = ifelse(res == Inf, outlierValues[2], res) | |
1916 } | |
1917 return(res) | |
1918 } | |
1919 } | |
1920 | |
1921 | |
1922 | |
1923 #' Return outliers | |
1924 #' | |
1925 #' Returns the outliers of a DHARMa object | |
1926 #' | |
1927 #' @param object an object with simulated residuals created by \code{\link{simulateResiduals}} | |
1928 #' @param lowerQuantile lower threshold for outliers. Default is zero = outside simulation envelope | |
1929 #' @param upperQuantile upper threshold for outliers. Default is 1 = outside simulation envelope | |
1930 #' @param return wheter to return an indices of outliers or a logical vector | |
1931 #' | |
1932 #' @details First of all, note that the standard definition of outlier in the DHARMa plots and outlier tests is an observation that is outside the simulation envelope. How far outside that is depends a lot on how many simulations you do. If you have 100 data points and to 100 simulations, you would expect to have one "outlier" on average, even with a perfectly fitting model. This is in fact what the outlier test tests. | |
1933 #' | |
1934 #' Thus, keep in mind that for a small number of simulations, outliers are mostly a technical term: these are points that are outside our simulations, but we don't know how far away they are. | |
1935 #' | |
1936 #' If you are seriously interested in HOW FAR outside the expected distribution a data point is, you should increase the number of simulations in \code{\link{simulateResiduals}} to be sure to get the tail of the data distribution correctly. In this case, it may make sense to adjust lowerQuantile and upperQuantile, e.g. to 0.025, 0.975, which would define outliers as values outside the central 95% of the distribution. | |
1937 #' | |
1938 #' Also, note that outliers are particularly concerning if they have a strong influence on the model fit. One could test the influence, for example, by removing them from the data, or by some meausures of leverage, e.g. generalisations for Cook's distance as in Pinho, L. G. B., Nobre, J. S., & Singer, J. M. (2015). Cook’s distance for generalized linear mixed models. Computational Statistics & Data Analysis, 82, 126–136. doi:10.1016/j.csda.2014.08.008. At the moment, however, no such function is provided in DHARMa. | |
1939 #' | |
1940 #' @export | |
1941 #' | |
1942 outliers <- function(object, lowerQuantile = 0, upperQuantile = 1, return = c("index", "logical")){ | |
1943 | |
1944 return = match.arg(return) | |
1945 | |
1946 out = residuals(object) >= upperQuantile | residuals(object) <= lowerQuantile | |
1947 | |
1948 if(return == "logical") return(out) | |
1949 else(return(which(out))) | |
1950 } | |
1951 | |
1952 | |
1953 | |
1954 #' Create a DHARMa object from hand-coded simulations or Bayesian posterior predictive simulations | |
1955 #' | |
1956 #' @param simulatedResponse matrix of observations simulated from the fitted model - row index for observations and colum index for simulations | |
1957 #' @param observedResponse true observations | |
1958 #' @param fittedPredictedResponse optional fitted predicted response. For Bayesian posterior predictive simulations, using the median posterior prediction as fittedPredictedResponse is recommended. If not provided, the mean simulatedResponse will be used. | |
1959 #' @param integerResponse if T, noise will be added at to the residuals to maintain a uniform expectations for integer responses (such as Poisson or Binomial). Unlike in \code{\link{simulateResiduals}}, the nature of the data is not automatically detected, so this MUST be set by the user appropriately | |
1960 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details. | |
1961 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}} | |
1962 #' @details The use of this function is to convert simulated residuals (e.g. from a point estimate, or Bayesian p-values) to a DHARMa object, to make use of the plotting / test functions in DHARMa | |
1963 #' @note Either scaled residuals or (simulatedResponse AND observed response) have to be provided | |
1964 #' @example inst/examples/createDharmaHelp.R | |
1965 #' @export | |
1966 createDHARMa <- function(simulatedResponse , observedResponse , fittedPredictedResponse = NULL, integerResponse = F, seed = 123, method = c("PIT", "traditional")){ | |
1967 | |
1968 randomState <-getRandomState(seed) | |
1969 on.exit({randomState$restoreCurrent()}) | |
1970 match.arg(method) | |
1971 | |
1972 out = list() | |
1973 out$simulatedResponse = simulatedResponse | |
1974 out$refit = F | |
1975 out$integerResponse = integerResponse | |
1976 out$observedResponse = observedResponse | |
1977 | |
1978 if(!is.matrix(simulatedResponse) & !is.null(observedResponse)) stop("either scaled residuals or simulations and observations have to be provided") | |
1979 if(ncol(simulatedResponse) < 2) stop("simulatedResponse with less than 2 simulations provided - cannot calculate residuals on that.") | |
1980 | |
1981 if(ncol(simulatedResponse) < 10) warning("simulatedResponse with less than 10 simulations provided. This rarely makes sense") | |
1982 | |
1983 out$nObs = length(observedResponse) | |
1984 | |
1985 if (out$nObs < 3) stop("warning - number of observations < 3 ... this rarely makes sense") | |
1986 | |
1987 if(! (out$nObs == nrow(simulatedResponse))) stop("dimensions of observedResponse and simulatedResponse do not match") | |
1988 | |
1989 out$nSim = ncol(simulatedResponse) | |
1990 | |
1991 out$scaledResiduals = getQuantile(simulations = simulatedResponse , observed = observedResponse , integerResponse = integerResponse, method = method) | |
1992 | |
1993 | |
1994 # makes sure that DHARM plots that rely on this vector won't crash | |
1995 if(is.null(fittedPredictedResponse)){ | |
1996 message("No fitted predicted response provided, using the mean of the simulations") | |
1997 fittedPredictedResponse = apply(simulatedResponse, 1, mean) | |
1998 } | |
1999 out$fittedPredictedResponse = fittedPredictedResponse | |
2000 out$randomState = randomState | |
2001 class(out) = "DHARMa" | |
2002 return(out) | |
2003 } | |
2004 | |
2005 | |
2006 #' Ensures that an object is of class DHARMa | |
2007 #' | |
2008 #' @param simulationOutput a DHARMa simulation output or an object that can be converted into a DHARMa simulation output | |
2009 #' @param convert if TRUE, attempts to convert model + numeric to DHARMa, if "Model", converts only supported models to DHARMa | |
2010 #' @details The | |
2011 #' @return an object of class DHARMa | |
2012 #' @keywords internal | |
2013 ensureDHARMa <- function(simulationOutput, | |
2014 convert = F){ | |
2015 | |
2016 if(inherits(simulationOutput, "DHARMa")){ | |
2017 return(simulationOutput) | |
2018 } else { | |
2019 | |
2020 if(convert == FALSE) stop("wrong argument to function, simulationOutput must be a DHARMa object!") | |
2021 else { | |
2022 | |
2023 if (class(simulationOutput)[1] %in% getPossibleModels()){ | |
2024 if (convert == "Model" | convert == T) return(simulateResiduals(simulationOutput)) | |
2025 } else if(is.vector(simulationOutput, mode = "numeric") & convert == T) { | |
2026 out = list() | |
2027 out$scaledResiduals = simulationOutput | |
2028 out$nObs = length(out$scaledResiduals) | |
2029 class(out) = "DHARMa" | |
2030 return(out) | |
2031 } | |
2032 } | |
2033 } | |
2034 stop("wrong argument to function, simulationOutput must be a DHARMa object or a numeric vector of quantile residuals!") | |
2035 } | |
2036 | |
2037 ####################### DHARMa.R | |
2038 | |
2039 ####################### tests.R | |
2040 | |
2041 #' DHARMa general residual test | |
2042 #' | |
2043 #' Calls both uniformity and dispersion test | |
2044 #' | |
2045 #' This function is a wrapper for the various test functions implemented in DHARMa. Currently, this function calls the \code{\link{testUniformity}} and the \code{\link{testDispersion}} functions. All other tests (see list below) have to be called by hand. | |
2046 #' | |
2047 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2048 #' @param plot if T, plots functions of the tests are called | |
2049 #' @author Florian Hartig | |
2050 #' @seealso \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2051 #' @example inst/examples/testsHelp.R | |
2052 #' @export | |
2053 testResiduals <- function(simulationOutput, plot = T){ | |
2054 | |
2055 opar = par(mfrow = c(1,3)) | |
2056 on.exit(par(opar)) | |
2057 out = list() | |
2058 out$uniformity = testUniformity(simulationOutput, plot = plot) | |
2059 out$dispersion = testDispersion(simulationOutput, plot = plot) | |
2060 out$outliers = testOutliers(simulationOutput, plot = plot) | |
2061 | |
2062 print(out) | |
2063 return(out) | |
2064 } | |
2065 | |
2066 #' Residual tests | |
2067 #' | |
2068 #' @details Deprecated, switch your code to using the \code{\link{testResiduals}} function | |
2069 #' | |
2070 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2071 #' @author Florian Hartig | |
2072 #' @export | |
2073 testSimulatedResiduals <- function(simulationOutput){ | |
2074 message("testSimulatedResiduals is deprecated, switch your code to using the testResiduals function") | |
2075 testResiduals(simulationOutput) | |
2076 } | |
2077 | |
2078 | |
2079 #' Test for overall uniformity | |
2080 #' | |
2081 #' This function tests the overall uniformity of the simulated residuals in a DHARMa object | |
2082 #' | |
2083 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2084 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis. See \code{\link[stats]{ks.test}} for details | |
2085 #' @param plot if T, plots calls \code{\link{plotQQunif}} as well | |
2086 #' @details The function applies a \code{\link[stats]{ks.test}} for uniformity on the simulated residuals. | |
2087 #' @author Florian Hartig | |
2088 #' @seealso \code{\link{testResiduals}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2089 #' @example inst/examples/testsHelp.R | |
2090 #' @export | |
2091 testUniformity<- function(simulationOutput, alternative = c("two.sided", "less", "greater"), plot = T){ | |
2092 | |
2093 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
2094 | |
2095 out <- suppressWarnings(ks.test(simulationOutput$scaledResiduals, 'punif', alternative = alternative)) | |
2096 if(plot == T) plotQQunif(simulationOutput = simulationOutput) | |
2097 return(out) | |
2098 } | |
2099 | |
2100 | |
2101 # Experimental | |
2102 testBivariateUniformity<- function(simulationOutput, alternative = c("two.sided", "less", "greater"), plot = T){ | |
2103 | |
2104 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
2105 | |
2106 #out <- suppressWarnings(ks.test(simulationOutput$scaledResiduals, 'punif', alternative = alternative)) | |
2107 #if(plot == T) plotQQunif(simulationOutput = simulationOutput) | |
2108 out = NULL | |
2109 return(out) | |
2110 } | |
2111 | |
2112 | |
2113 | |
2114 #' Test for quantiles | |
2115 #' | |
2116 #' This function tests | |
2117 #' | |
2118 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2119 #' @param predictor an optional predictor variable to be used, instead of the predicted response (default) | |
2120 #' @param quantiles the quantiles to be tested | |
2121 #' @param plot if T, the function will create an additional plot | |
2122 #' @details The function fits quantile regressions (via package qgam) on the residuals, and compares their location to the expected location (because of the uniform distributionm, the expected location is 0.5 for the 0.5 quantile). | |
2123 #' | |
2124 #' A significant p-value for the splines means the fitted spline deviates from a flat line at the expected location (p-values of intercept and spline are combined via Benjamini & Hochberg adjustment to control the FDR) | |
2125 #' | |
2126 #' The p-values of the splines are combined into a total p-value via Benjamini & Hochberg adjustment to control the FDR. | |
2127 #' | |
2128 #' @author Florian Hartig | |
2129 #' @example inst/examples/testQuantilesHelp.R | |
2130 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testOutliers}} | |
2131 #' @export | |
2132 testQuantiles <- function(simulationOutput, predictor = NULL, quantiles = c(0.25,0.5,0.75), plot = T){ | |
2133 | |
2134 if(plot == F){ | |
2135 | |
2136 out = list() | |
2137 out$data.name = deparse(substitute(simulationOutput)) | |
2138 | |
2139 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
2140 res = simulationOutput$scaledResiduals | |
2141 pred = ensurePredictor(simulationOutput, predictor) | |
2142 | |
2143 dat=data.frame(res = simulationOutput$scaledResiduals , pred = pred) | |
2144 | |
2145 quantileFits <- list() | |
2146 pval = rep(NA, length(quantiles)) | |
2147 predictions = data.frame(pred = sort(dat$pred)) | |
2148 predictions = cbind(predictions, matrix(ncol = 2 * length(quantiles), nrow = nrow(dat))) | |
2149 for(i in 1:length(quantiles)){ | |
2150 datTemp = dat | |
2151 datTemp$res = datTemp$res - quantiles[i] | |
2152 | |
2153 # settings for k = the dimension of the basis used to represent the smooth term. | |
2154 # see https://github.com/mfasiolo/qgam/issues/37 | |
2155 dimSmooth = min(length(unique(datTemp$pred)), 10) | |
2156 quantResult = try(capture.output(quantileFits[[i]] <- qgam::qgam(res ~ s(pred, k = dimSmooth) , data =datTemp, qu = quantiles[i])), silent = T) | |
2157 if(inherits(quantResult, "try-error")){ | |
2158 message("Unable to calculate quantile regression for quantile ", quantiles[i], ". Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.") | |
2159 } else { | |
2160 x = summary(quantileFits[[i]]) | |
2161 pval[i] = min(p.adjust(c(x$p.table[1,4], x$s.table[1,4]), method = "BH")) # correction for test on slope and intercept | |
2162 quantPre = predict(quantileFits[[i]], newdata = predictions, se = T) | |
2163 predictions[, 2*i] = quantPre$fit + quantiles[i] | |
2164 predictions[, 2*i + 1] = quantPre$se.fit | |
2165 } | |
2166 } | |
2167 | |
2168 out$method = "Test for location of quantiles via qgam" | |
2169 out$alternative = "both" | |
2170 out$pvals = pval | |
2171 out$p.value = min(p.adjust(pval, method = "BH")) # correction for multiple quantile tests | |
2172 out$predictions = predictions | |
2173 out$qgamFits = quantileFits | |
2174 | |
2175 class(out) = "htest" | |
2176 | |
2177 } else if(plot == T) { | |
2178 out <- plotResiduals(simulationOutput = simulationOutput, predictor = predictor, quantiles = quantiles) | |
2179 } | |
2180 return(out) | |
2181 } | |
2182 | |
2183 | |
2184 #unif.2017YMi(X, type = c("Q1", "Q2", "Q3"), lower = rep(0, ncol(X)),upper = rep(1, ncol(X))) | |
2185 | |
2186 #' Test for outliers | |
2187 #' | |
2188 #' This function tests if the number of observations outside the simulatio envelope are larger or smaller than expected | |
2189 #' | |
2190 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2191 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" (default) compared to the simulated null hypothesis | |
2192 #' @param margin whether to test for outliers only at the lower, only at the upper, or both sides (default) of the simulated data distribution | |
2193 #' @param plot if T, the function will create an additional plot | |
2194 #' @details DHARMa residuals are created by simulating from the fitted model, and comparing the simulated values to the observed data. It can occur that all simulated values are higher or smaller than the observed data, in which case they get the residual value of 0 and 1, respectively. I refer to these values as simulation outliers, or simply outliers. | |
2195 #' | |
2196 #' Because no data was simulated in the range of the observed value, we don't know "how strongly" these values deviate from the model expectation, so the term "outlier" should be used with a grain of salt - it's not a judgment about the magnitude of a deviation from an expectation, but simply that we are outside the simulated range, and thus cannot say anything more about the location of the residual. | |
2197 #' | |
2198 #' Note also that the number of outliers will decrease as we increase the number of simulations. Under the null hypothesis that the model is correct, we expect nData /(nSim +1) outliers at each margin of the distribution. For a reason, consider that if the data and the model distribution are identical, the probability that a given observation is higher than all simulations is 1/(nSim +1). | |
2199 #' | |
2200 #' Based on this null expectation, we can test for an excess or lack of outliers. Per default, testOutliers() looks for both, so if you get a significant p-value, you have to check if you have to many or too few outliers. An excess of outliers is to be interpreted as too many values outside the simulation envelope. This could be caused by overdispersion, or by what we classically call outliers. A lack of outliers would be caused, for example, by underdispersion. | |
2201 #' | |
2202 #' | |
2203 #' @author Florian Hartig | |
2204 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2205 #' @example inst/examples/testOutliersHelp.R | |
2206 #' @export | |
2207 testOutliers <- function(simulationOutput, alternative = c("two.sided", "greater", "less"), margin = c("both", "upper", "lower"), plot = T){ | |
2208 | |
2209 # check inputs | |
2210 alternative = match.arg(alternative) | |
2211 margin = match.arg(margin) | |
2212 data.name = deparse(substitute(simulationOutput)) # remember: needs to be called before ensureDHARMa | |
2213 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model") | |
2214 | |
2215 # calculation of outliers | |
2216 if(margin == "both") outliers = sum(simulationOutput$scaledResiduals %in% c(0, 1)) | |
2217 if(margin == "upper") outliers = sum(simulationOutput$scaledResiduals == 1) | |
2218 if(margin == "lower") outliers = sum(simulationOutput$scaledResiduals == 0) | |
2219 | |
2220 # calculations of trials and H0 | |
2221 outFreqH0 = 1/(simulationOutput$nSim +1) * ifelse(margin == "both", 2, 1) | |
2222 trials = simulationOutput$nObs | |
2223 | |
2224 out = binom.test(outliers, trials, p = outFreqH0, alternative = alternative) | |
2225 | |
2226 # overwrite information in binom.test | |
2227 | |
2228 out$data.name = data.name | |
2229 out$margin = margin | |
2230 out$method = "DHARMa outlier test based on exact binomial test" | |
2231 | |
2232 names(out$statistic) = paste("outliers at", margin, "margin(s)") | |
2233 names(out$parameter) = "simulations" | |
2234 names(out$estimate) = paste("frequency of outliers (expected:", out$null.value,")") | |
2235 | |
2236 out$interval = "tst" | |
2237 | |
2238 out$interval = | |
2239 | |
2240 if(plot == T) { | |
2241 | |
2242 hist(simulationOutput, main = "") | |
2243 | |
2244 main = ifelse(out$p.value <= 0.05, | |
2245 "Outlier test significant", | |
2246 "Outlier test n.s.") | |
2247 | |
2248 title(main = main, cex.main = 1, | |
2249 col.main = ifelse(out$p.value <= 0.05, "red", "black")) | |
2250 | |
2251 # legend("center", c(paste("p=", round(out$p.value, digits = 5)), paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(out$p.value < 0.05, "red", "black" )) | |
2252 | |
2253 } | |
2254 return(out) | |
2255 } | |
2256 | |
2257 | |
2258 #' DHARMa dispersion tests | |
2259 #' | |
2260 #' This function performs a simulation-based test for over/underdispersion | |
2261 #' | |
2262 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2263 #' @param plot whether to plot output | |
2264 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis. Greater corresponds to overdispersion. | |
2265 #' @param ... arguments to pass on to \code{\link{testGeneric}} | |
2266 #' @details The function implements two tests, depending on whether it is applied on a simulation with refit = F, or refit = T. | |
2267 #' | |
2268 #' If refit = F, the function tests the sd of the data against the sd of the simulated data. | |
2269 #' | |
2270 #' If refit = T, the function compares the approximate deviance (via squared pearson residuals) with the same quantity from the models refitted with simulated data. Applying this is much slower than the previous alternative. Given the computational cost, I would suggest that most users will be satisfied with the standard dispersion test. | |
2271 #' | |
2272 #' @note The results of the dispersion test can can differ depending on whether it is evaluated on conditional (= conditional on fitted random effects) or unconditional (= REs are re-simulated) simulations. You can change between conditional or unconditional simulations in \code{\link{simulateResiduals}} if this is supported by the regression package that you use. The default in DHARMa is to use unconditional simulations, but I have often found that conditional simulations are more sensitive to dispersion problems. I recommend trying both, as neither test should be positive if the dispersion is correct. | |
2273 #' | |
2274 #' @author Florian Hartig | |
2275 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2276 #' @example inst/examples/testsHelp.R | |
2277 #' @export | |
2278 testDispersion <- function(simulationOutput, alternative = c("two.sided", "greater", "less"), plot = T, ...){ | |
2279 | |
2280 out = list() | |
2281 out$data.name = deparse(substitute(simulationOutput)) | |
2282 | |
2283 alternative <- match.arg(alternative) | |
2284 | |
2285 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model") | |
2286 | |
2287 if(simulationOutput$refit == F){ | |
2288 | |
2289 spread <- function(x) sd(x - simulationOutput$fittedPredictedResponse) | |
2290 out = testGeneric(simulationOutput, summary = spread, alternative = alternative, methodName = "DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated", plot = plot, ...) | |
2291 } else { | |
2292 | |
2293 observed = tryCatch(sum(residuals(simulationOutput$fittedModel, type = "pearson")^2), error = function(e) { | |
2294 message(paste("DHARMa: the requested tests requires pearson residuals, but your model does not implement these calculations. Test will return NA. Error message:", e)) | |
2295 return(NA) | |
2296 }) | |
2297 if(is.na(observed)) return(NA) | |
2298 expected = apply(simulationOutput$refittedPearsonResiduals^2 , 2, sum) | |
2299 out$statistic = c(dispersion = observed / mean(expected)) | |
2300 out$method = "DHARMa nonparametric dispersion test via mean deviance residual fitted vs. simulated-refitted" | |
2301 | |
2302 p = getP(simulated = expected, observed = observed, alternative = alternative) | |
2303 | |
2304 out$alternative = alternative | |
2305 out$p.value = p | |
2306 class(out) = "htest" | |
2307 | |
2308 if(plot == T) { | |
2309 #plotTitle = gsub('(.{1,50})(\\s|$)', '\\1\n', out$method) | |
2310 xLabel = paste("Simulated values, red line = fitted model. p-value (",out$alternative, ") = ", out$p.value, sep ="") | |
2311 | |
2312 hist(expected, xlim = range(expected, observed, na.rm=T ), col = "lightgrey", main = "", xlab = xLabel, breaks = 20, cex.main = 1) | |
2313 abline(v = observed, lwd= 2, col = "red") | |
2314 | |
2315 main = ifelse(out$p.value <= 0.05, | |
2316 "Dispersion test significant", | |
2317 "Dispersion test n.s.") | |
2318 | |
2319 title(main = main, cex.main = 1, | |
2320 col.main = ifelse(out$p.value <= 0.05, "red", "black")) | |
2321 } | |
2322 } | |
2323 | |
2324 return(out) | |
2325 } | |
2326 | |
2327 #' Simulated overdisperstion tests | |
2328 #' | |
2329 #' @details Deprecated, switch your code to using the \code{\link{testDispersion}} function | |
2330 #' | |
2331 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2332 #' @param ... additional arguments to \code{\link{testDispersion}} | |
2333 #' @export | |
2334 testOverdispersion <- function(simulationOutput, ...){ | |
2335 message("testOverdispersion is deprecated, switch your code to using the testDispersion function") | |
2336 testDispersion(simulationOutput, ...) | |
2337 } | |
2338 | |
2339 #' Parametric overdisperstion tests | |
2340 #' | |
2341 #' @details Deprecated, switch your code to using the \code{\link{testDispersion}} function. The function will do nothing, arguments will be ignored, the parametric tests is no longer recommend | |
2342 #' | |
2343 #' @param ... arguments will be ignored, the parametric tests is no longer recommend | |
2344 #' @export | |
2345 testOverdispersionParametric <- function(...){ | |
2346 message("testOverdispersionParametric is deprecated and no longer recommended, see release notes in DHARMA 0.2.0 - switch your code to using the testDispersion function") | |
2347 return(0) | |
2348 } | |
2349 | |
2350 | |
2351 #' Tests for zero-inflation | |
2352 #' | |
2353 #' This function compares the observed number of zeros with the zeros expected from simulations. | |
2354 #' | |
2355 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2356 #' @param ... further arguments to \code{\link{testGeneric}} | |
2357 #' @details The plot shows the expected distribution of zeros against the observed values, the ratioObsSim shows observed vs. simulated zeros. A value < 1 means that the observed data has less zeros than expected, a value > 1 means that it has more zeros than expected (aka zero-inflation). Per default, the function tests both sides. | |
2358 #' | |
2359 #' Some notes about common problems / questions: | |
2360 #' | |
2361 #' * Zero-inflation tests after fitting the model are crucial to see if you have zero-inflation. Just because there are a lot of zeros doesn't mean you have zero-inflation, see Warton, D. I. (2005). Many zeros does not mean zero inflation: comparing the goodness-of-fit of parametric models to multivariate abundance data. Environmetrics 16(3), 275-289. | |
2362 #' | |
2363 #' * That being said, zero-inflation tests are often not a reliable guide to decide wheter to add a zi term or not. In general, model structures should be decided on ideally a priori, if that is not possible via model selection techniques (AIC, BIC, WAIC, Bayes Factor). A zero-inflation test should only be run after that decision, and to validate the decision that was taken. | |
2364 #' | |
2365 #' @note This function is a wrapper for \code{\link{testGeneric}}, where the summary argument is set to function(x) sum(x == 0) | |
2366 #' @author Florian Hartig | |
2367 #' @example inst/examples/testsHelp.R | |
2368 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2369 #' @export | |
2370 testZeroInflation <- function(simulationOutput, ...){ | |
2371 countZeros <- function(x) sum( x == 0) | |
2372 testGeneric(simulationOutput = simulationOutput, summary = countZeros, methodName = "DHARMa zero-inflation test via comparison to expected zeros with simulation under H0 = fitted model", ... ) | |
2373 } | |
2374 | |
2375 | |
2376 #' Generic simulation test of a summary statistic | |
2377 #' | |
2378 #' This function tests if a user-defined summary differs when applied to simulated / observed data. | |
2379 #' | |
2380 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2381 #' @param summary a function that can be applied to simulated / observed data. See examples below | |
2382 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis | |
2383 #' @param plot whether to plot the simulated summary | |
2384 #' @param methodName name of the test (will be used in plot) | |
2385 #' | |
2386 #' @details This function tests if a user-defined summary differs when applied to simulated / observed data. the function can easily be remodeled to apply summaries on the residuals, by simply defining f = function(x) summary (x - predictions), as done in \code{\link{testDispersion}} | |
2387 #' | |
2388 #' @note The function that you supply is applied on the data as it is represented in your fitted model, which may not always correspond to how you think. This is important in particular when you use k/n binomial data, and want to test for 1-inflation. As an example, if have k/20 observations, and you provide your data via cbind (y, y-20), you have to test for 20-inflation (because this is how the data is represented in the model). However, if you provide data via y/20, and weights = 20, you should test for 1-inflation. In doubt, check how the data is internally represented in model.frame(model), or via simulate(model) | |
2389 #' | |
2390 #' @export | |
2391 #' @author Florian Hartig | |
2392 #' @example inst/examples/testsHelp.R | |
2393 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2394 testGeneric <- function(simulationOutput, summary, alternative = c("two.sided", "greater", "less"), plot = T, methodName = "DHARMa generic simulation test"){ | |
2395 | |
2396 out = list() | |
2397 out$data.name = deparse(substitute(simulationOutput)) | |
2398 | |
2399 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model") | |
2400 | |
2401 alternative <- match.arg(alternative) | |
2402 | |
2403 observed = summary(simulationOutput$observedResponse) | |
2404 | |
2405 simulated = apply(simulationOutput$simulatedResponse, 2, summary) | |
2406 | |
2407 p = getP(simulated = simulated, observed = observed, alternative = alternative) | |
2408 | |
2409 out$statistic = c(ratioObsSim = observed / mean(simulated)) | |
2410 out$method = methodName | |
2411 out$alternative = alternative | |
2412 out$p.value = p | |
2413 | |
2414 | |
2415 class(out) = "htest" | |
2416 | |
2417 if(plot == T) { | |
2418 plotTitle = gsub('(.{1,50})(\\s|$)', '\\1\n', methodName) | |
2419 xLabel = paste("Simulated values, red line = fitted model. p-value (",out$alternative, ") = ", out$p.value, sep ="") | |
2420 hist(simulated, xlim = range(simulated, observed, na.rm=T ), col = "lightgrey", main = plotTitle, xlab = xLabel, breaks = max(round(simulationOutput$nSim / 5), 20), cex.main = 0.8) | |
2421 abline(v = observed, lwd= 2, col = "red") | |
2422 } | |
2423 return(out) | |
2424 } | |
2425 | |
2426 | |
2427 #' Test for temporal autocorrelation | |
2428 #' | |
2429 #' This function performs a standard test for temporal autocorrelation on the simulated residuals | |
2430 #' | |
2431 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}} | |
2432 #' @param time the time, in the same order as the data points. If not provided, random values will be created | |
2433 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis | |
2434 #' @param plot whether to plot output | |
2435 #' @details The function performs a Durbin-Watson test on the uniformly scaled residuals, and plots the residuals against time. The DB test was originally be designed for normal residuals. In simulations, I didn't see a problem with this setting though. The alternative is to transform the uniform residuals to normal residuals and perform the DB test on those. | |
2436 #' | |
2437 #' If no time values are provided, random values will be created. The sense of being able to run the test with time = NULL (random values) is to test the rate of false positives under the current residual structure (random time corresponds to H0: no spatial autocorrelation), e.g. to check if the test has noninal error rates for particular residual structures (note that Durbin-Watson originally assumes normal residuals, error rates seem correct for uniform residuals, but may not be correct if there are still other residual problems). | |
2438 #' | |
2439 #' Testing for temporal autocorrelation requires unique time values - if you have several observations per time value, either use the recalculateResiduals function to aggregate residuals per time step, or extract the residuals from the fitted object, and plot / test each of them independently for temporally repeated subgroups (typical choices would be location / subject etc.). Note that the latter must be done by hand, outside testSpatialAutocorrelation. | |
2440 #' | |
2441 #' @note Important to note for all autocorrelation tests (spatial / temporal): the autocorrelation tests are valid to check for residual autocorrelation in models that don't assume such a correlation (in this case, you can use conditional or unconditional simulations), or if there is remaining residual autocorrelation after accounting for it in a spatial/temporal model (in that case, you have to use conditional simulations), but if checking unconditional simulations from a model with an autocorrelation structure on data that corresponds to this model, they will be significant, even if the model fully accounts for this structure. | |
2442 #' | |
2443 #' This behavior is not really a bug, but rather originates from the definition of the quantile residuals: quantile residuals are calculated independently per data point, i.e. without consideratin of any correlation structure between data points that may exist in the simulations. As a result, the simulated distributions from a unconditional simulaton will typically not reflect the correlation structure that is present in each single simulation, and the same is true for the subsequently calculated quantile residuals. | |
2444 #' | |
2445 #' The bottomline here is that spatial / temporal / other autoregressive models should either be tested based on conditional simulations, or (ideally) custom tests should be used that are not based on quantile residuals, but rather compare the correlation structure in the simulated data with the correlation structure in the observed data. | |
2446 #' | |
2447 #' @author Florian Hartig | |
2448 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2449 #' @example inst/examples/testTemporalAutocorrelationHelp.R | |
2450 #' @export | |
2451 testTemporalAutocorrelation <- function(simulationOutput, time = NULL , alternative = c("two.sided", "greater", "less"), plot = T){ | |
2452 | |
2453 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
2454 | |
2455 # actually not sure if this is neccessary for dwtest, but seems better to aggregate | |
2456 if(any(duplicated(time))) stop("testing for temporal autocorrelation requires unique time values - if you have several observations per time value, either use the recalculateResiduals function to aggregate residuals per time step, or extract the residuals from the fitted object, and plot / test each of them independently for temporally repeated subgroups (typical choices would be location / subject etc.). Note that the latter must be done by hand, outside testSpatialAutocorrelation.") | |
2457 | |
2458 alternative <- match.arg(alternative) | |
2459 | |
2460 if(is.null(time)){ | |
2461 time = sample.int(simulationOutput$nObs, simulationOutput$nObs) | |
2462 message("DHARMa::testTemporalAutocorrelation - no time argument provided, using random times for each data point") | |
2463 } | |
2464 | |
2465 out = lmtest::dwtest(simulationOutput$scaledResiduals ~ 1, order.by = time, alternative = alternative) | |
2466 | |
2467 if(plot == T) { | |
2468 oldpar <- par(mfrow = c(1,2)) | |
2469 on.exit(par(oldpar)) | |
2470 | |
2471 plot(simulationOutput$scaledResiduals[order(time)] ~ time[order(time)], | |
2472 type = "l", ylab = "Scaled residuals", xlab = "Time", main = "Residuals vs. time") | |
2473 acf(simulationOutput$scaledResiduals[order(time)], main = "Autocorrelation") | |
2474 legend("topright", | |
2475 c(paste(out$method, " p=", round(out$p.value, digits = 5)), | |
2476 paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), | |
2477 text.col = ifelse(out$p.value < 0.05, "red", "black" ), bty="n") | |
2478 | |
2479 } | |
2480 | |
2481 return(out) | |
2482 } | |
2483 | |
2484 | |
2485 #' Test for spatial autocorrelation | |
2486 #' | |
2487 #' This function performs a standard test for spatial autocorrelation on the simulated residuals | |
2488 #' | |
2489 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2490 #' @param x the x coordinate, in the same order as the data points. If not provided, random values will be created | |
2491 #' @param y the y coordinate, in the same order as the data points. If not provided, random values will be created | |
2492 #' @param distMat optional distance matrix. If not provided, a distance matrix will be calculated based on x and y. See details for explanation | |
2493 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis | |
2494 #' @param plot whether to plot output | |
2495 #' @details The function performs Moran.I test from the package ape, based on the provided distance matrix of the data points. | |
2496 #' | |
2497 #' There are several ways to specify this distance. If a distance matrix (distMat) is provided, calculations will be based on this distance matrix, and x,y coordinates will only used for the plotting (if provided) | |
2498 #' If distMat is not provided, the function will calculate the euclidian distances between x,y coordinates, and test Moran.I based on these distances. | |
2499 #' | |
2500 #' If no x/y values are provided, random values will be created. The sense of being able to run the test with x/y = NULL (random values) is to test the rate of false positives under the current residual structure (random x/y corresponds to H0: no spatial autocorrelation), e.g. to check if the test has nominal error rates for particular residual structures. | |
2501 #' | |
2502 #' Testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, either use the recalculateResiduals function to aggregate residuals per location, or extract the residuals from the fitted object, and plot / test each of them independently for spatially repeated subgroups (a typical scenario would repeated spatial observation, in which case one could plot / test each time step separately for temporal autocorrelation). Note that the latter must be done by hand, outside testSpatialAutocorrelation. | |
2503 #' | |
2504 #' @note Important to note for all autocorrelation tests (spatial / temporal): the autocorrelation tests are valid to check for residual autocorrelation in models that don't assume such a correlation (in this case, you can use conditional or unconditional simulations), or if there is remaining residual autocorrelation after accounting for it in a spatial/temporal model (in that case, you have to use conditional simulations), but if checking unconditional simulations from a model with an autocorrelation structure on data that corresponds to this model, they will be significant, even if the model fully accounts for this structure. | |
2505 #' | |
2506 #' This behavior is not really a bug, but rather originates from the definition of the quantile residuals: quantile residuals are calculated independently per data point, i.e. without consideratin of any correlation structure between data points that may exist in the simulations. As a result, the simulated distributions from a unconditional simulaton will typically not reflect the correlation structure that is present in each single simulation, and the same is true for the subsequently calculated quantile residuals. | |
2507 #' | |
2508 #' The bottomline here is that spatial / temporal / other autoregressive models should either be tested based on conditional simulations, or (ideally) custom tests should be used that are not based on quantile residuals, but rather compare the correlation structure in the simulated data with the correlation structure in the observed data. | |
2509 #' | |
2510 #' @author Florian Hartig | |
2511 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testQuantiles}} | |
2512 #' @import grDevices | |
2513 #' @example inst/examples/testSpatialAutocorrelationHelp.R | |
2514 #' @export | |
2515 testSpatialAutocorrelation <- function(simulationOutput, x = NULL, y = NULL, distMat = NULL, alternative = c("two.sided", "greater", "less"), plot = T){ | |
2516 | |
2517 alternative <- match.arg(alternative) | |
2518 data.name = deparse(substitute(simulationOutput)) # needs to be before ensureDHARMa | |
2519 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
2520 | |
2521 if(any(duplicated(cbind(x,y)))) stop("testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, either use the recalculateResiduals function to aggregate residuals per location, or extract the residuals from the fitted object, and plot / test each of them independently for spatially repeated subgroups (a typical scenario would repeated spatial observation, in which case one could plot / test each time step separately for temporal autocorrelation). Note that the latter must be done by hand, outside testSpatialAutocorrelation.") | |
2522 | |
2523 if( (!is.null(x) | !is.null(y)) & !is.null(distMat) ) message("both coordinates and distMat provided, calculations will be done based on the distance matrix, coordinates will only be used for plotting") | |
2524 # if not provided, fill x and y with random numbers (Null model) | |
2525 if(is.null(x)){ | |
2526 x = runif(simulationOutput$nObs, -1,1) | |
2527 message("DHARMa::testSpatialAutocorrelation - no x coordinates provided, using random values for each data point") | |
2528 } | |
2529 | |
2530 if(is.null(y)){ | |
2531 y = runif(simulationOutput$nObs, -1,1) | |
2532 message("DHARMa::testSpatialAutocorrelation - no x coordinates provided, using random values for each data point") | |
2533 } | |
2534 | |
2535 # if not provided, create distance matrix based on x and y | |
2536 if(is.null(distMat)) distMat <- as.matrix(dist(cbind(x, y))) | |
2537 | |
2538 invDistMat <- 1/distMat | |
2539 diag(invDistMat) <- 0 | |
2540 | |
2541 MI = ape::Moran.I(simulationOutput$scaledResiduals, weight = invDistMat, alternative = alternative) | |
2542 | |
2543 out = list() | |
2544 out$statistic = c(observed = MI$observed, expected = MI$expected, sd = MI$sd) | |
2545 out$method = "DHARMa Moran's I test for spatial autocorrelation" | |
2546 out$alternative = "Spatial autocorrelation" | |
2547 out$p.value = MI$p.value | |
2548 out$data.name = data.name | |
2549 | |
2550 class(out) = "htest" | |
2551 | |
2552 | |
2553 | |
2554 if(plot == T) { | |
2555 opar <- par(mfrow = c(1,1)) | |
2556 on.exit(par(opar)) | |
2557 | |
2558 col = colorRamp(c("red", "white", "blue"))(simulationOutput$scaledResiduals) | |
2559 plot(x,y, col = rgb(col, maxColorValue = 255), main = out$method, cex.main = 0.8 ) | |
2560 | |
2561 # TODO implement correlogram | |
2562 } | |
2563 | |
2564 if(plot == T) { | |
2565 | |
2566 | |
2567 } | |
2568 return(out) | |
2569 } | |
2570 | |
2571 | |
2572 getP <- function(simulated, observed, alternative){ | |
2573 | |
2574 if(alternative == "greater") p = mean(simulated >= observed) | |
2575 if(alternative == "less") p = mean(simulated <= observed) | |
2576 if(alternative == "two.sided") p = min(min(mean(simulated <= observed), mean(simulated >= observed) ) * 2,1) | |
2577 | |
2578 return(p) | |
2579 } | |
2580 | |
2581 | |
2582 | |
2583 ####################### tests.R | |
2584 | |
2585 ####################### compatibility.R | |
2586 | |
2587 | |
2588 # New S3 methods | |
2589 | |
2590 #' Get model response | |
2591 #' | |
2592 #' Extract the response of a fitted model | |
2593 #' | |
2594 #' The purpose of this function is to savely extract the response (dependent variable) of the fitted model classes | |
2595 #' | |
2596 #' @param object a fitted model | |
2597 #' @param ... additional parameters | |
2598 #' | |
2599 #' @example inst/examples/wrappersHelp.R | |
2600 #' | |
2601 #' @seealso \code{\link{getRefit}}, \code{\link{getSimulations}}, \code{\link{getFixedEffects}}, \code{\link{getFitted}} | |
2602 #' @author Florian Hartig | |
2603 #' @export | |
2604 getObservedResponse <- function (object, ...) { | |
2605 UseMethod("getObservedResponse", object) | |
2606 } | |
2607 | |
2608 #' @rdname getObservedResponse | |
2609 #' @export | |
2610 getObservedResponse.default <- function (object, ...){ | |
2611 out = model.frame(object)[,1] | |
2612 | |
2613 # check for weights in k/n case | |
2614 if(family(object)$family %in% c("binomial", "betabinomial") & "(weights)" %in% colnames(model.frame(object))){ | |
2615 x = model.frame(object) | |
2616 out = out * x$`(weights)` | |
2617 } | |
2618 | |
2619 # check for k/n binomial | |
2620 if(is.matrix(out)){ | |
2621 if(!(ncol(out) == 2)) securityAssertion("nKcase - wrong dimensions of response") | |
2622 if(!(family(object)$family %in% c("binomial", "betabinomial"))) securityAssertion("nKcase - wrong family") | |
2623 | |
2624 out = out[,1] | |
2625 } | |
2626 | |
2627 # observation is factor - unlike lme4 and older, glmmTMB simulates nevertheless as numeric | |
2628 if(is.factor(out)) out = as.numeric(out) - 1 | |
2629 | |
2630 return(out) | |
2631 } | |
2632 | |
2633 weightsWarning = "Model was fit with prior weights. These will be ignored in the simulation. See ?getSimulations for details" | |
2634 | |
2635 #' Get model simulations | |
2636 #' | |
2637 #' Wrapper to simulate from a fitted model | |
2638 #' | |
2639 #' The purpose of this wrapper for for the simulate function is to return the simulations from a model in a standardized way | |
2640 #' | |
2641 #' @param object a fitted model | |
2642 #' @param nsim number of simulations | |
2643 #' @param type if simulations should be prepared for getQuantile or for refit | |
2644 #' @param ... additional parameters to be passed on, usually to the simulate function of the respective model class | |
2645 #' | |
2646 #' @return a matrix with simulations | |
2647 #' @example inst/examples/wrappersHelp.R | |
2648 #' | |
2649 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getRefit}}, \code{\link{getFixedEffects}}, \code{\link{getFitted}} | |
2650 #' | |
2651 #' @details The function is a wrapper for for the simulate function is to return the simulations from a model in a standardized way. | |
2652 #' | |
2653 #' Note: if the model was fit with weights, the function will throw a warning if used with a model class whose simulate function does not include the weightsi in the simulations. Note that the results may or may not be appropriate in this case, depending on how you use the weights. | |
2654 #' | |
2655 #' | |
2656 #' @author Florian Hartig | |
2657 #' @export | |
2658 getSimulations <- function (object, nsim = 1 , type = c("normal", "refit"), ...) { | |
2659 UseMethod("getSimulations", object) | |
2660 } | |
2661 | |
2662 #' @rdname getSimulations | |
2663 #' @export | |
2664 getSimulations.default <- function (object, nsim = 1, type = c("normal", "refit"), ...){ | |
2665 | |
2666 type <- match.arg(type) | |
2667 | |
2668 out = simulate(object, nsim = nsim , ...) | |
2669 | |
2670 if (type == "normal"){ | |
2671 if(family(object)$family %in% c("binomial", "betabinomial")){ | |
2672 if("(weights)" %in% colnames(model.frame(object))){ | |
2673 x = model.frame(object) | |
2674 out = out * x$`(weights)` | |
2675 } else if (is.matrix(out[[1]])){ | |
2676 # this is for the k/n binomial case | |
2677 out = as.matrix(out)[,seq(1, (2*nsim), by = 2)] | |
2678 } else if(is.factor(out[[1]])){ | |
2679 if(nlevels(out[[1]]) != 2){ | |
2680 warning("The fitted model has a factorial response with number of levels not equal to 2 - there is currently no sensible application in DHARMa that would lead to this situation. Likely, you are trying something that doesn't work.") | |
2681 } | |
2682 else{ | |
2683 out = data.matrix(out) - 1 | |
2684 } | |
2685 } | |
2686 } | |
2687 | |
2688 if(!is.matrix(out)) out = data.matrix(out) | |
2689 } else{ | |
2690 if(family(object)$family %in% c("binomial", "betabinomial")){ | |
2691 if (!is.matrix(out[[1]]) & !is.numeric(out)) data.frame(data.matrix(out)-1) | |
2692 } | |
2693 } | |
2694 | |
2695 return(out) | |
2696 } | |
2697 | |
2698 | |
2699 #' Extract fixed effects of a supported model | |
2700 #' | |
2701 #' A wrapper to extract fixed effects of a supported model | |
2702 #' | |
2703 #' @param fittedModel a fitted model | |
2704 #' | |
2705 #' @example inst/examples/wrappersHelp.R | |
2706 #' | |
2707 #' @importFrom lme4 fixef | |
2708 #' | |
2709 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getRefit}}, \code{\link{getFitted}} | |
2710 #' @export | |
2711 getFixedEffects <- function(fittedModel){ | |
2712 | |
2713 if(class(fittedModel)[1] %in% c("glm", "lm", "gam", "bam", "negbin") ){ | |
2714 out = coef(fittedModel) | |
2715 } else if(class(fittedModel)[1] %in% c("glmerMod", "lmerMod", "HLfit")){ | |
2716 out = fixef(fittedModel) | |
2717 } else if(class(fittedModel)[1] %in% c("glmmTMB")){ | |
2718 out = glmmTMB::fixef(fittedModel) | |
2719 out = out$cond | |
2720 } else { | |
2721 out = coef(fittedModel) | |
2722 if(is.null(out)) out = fixef(fittedModel) | |
2723 } | |
2724 return(out) | |
2725 } | |
2726 | |
2727 #' Get model refit | |
2728 #' | |
2729 #' Wrapper to refit a fitted model | |
2730 #' | |
2731 #' @param object a fitted model | |
2732 #' @param newresp the new response that should be used to refit the model | |
2733 #' @param ... additional parameters to be passed on to the refit or update class that is used to refit the model | |
2734 #' | |
2735 #' @details The purpose of this wrapper is to standardize the refit of a model. The behavior of this function depends on the supplied model. When available, it uses the refit method, otherwise it will use update. For glmmTMB: since version 1.0, glmmTMB has a refit function, but this didn't work, so I switched back to this implementation, which is a hack based on the update function. | |
2736 #' | |
2737 #' @example inst/examples/wrappersHelp.R | |
2738 #' | |
2739 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getFixedEffects}} | |
2740 #' @author Florian Hartig | |
2741 #' @export | |
2742 getRefit <- function (object, newresp, ...) { | |
2743 UseMethod("getRefit", object) | |
2744 } | |
2745 | |
2746 #' @rdname getRefit | |
2747 #' | |
2748 #' @importFrom lme4 refit | |
2749 #' | |
2750 #' @export | |
2751 getRefit.default <- function (object, newresp, ...){ | |
2752 refit(object, newresp, ...) | |
2753 } | |
2754 | |
2755 #' Get model fitted | |
2756 #' | |
2757 #' Wrapper to get the fitted value a fitted model | |
2758 #' | |
2759 #' The purpose of this wrapper is to standardize extract the fitted values | |
2760 #' | |
2761 #' @param object a fitted model | |
2762 #' @param ... additional parameters to be passed on, usually to the simulate function of the respective model class | |
2763 #' | |
2764 #' @example inst/examples/wrappersHelp.R | |
2765 #' | |
2766 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getRefit}}, \code{\link{getFixedEffects}} | |
2767 #' | |
2768 #' @author Florian Hartig | |
2769 #' @export | |
2770 getFitted <- function (object, ...) { | |
2771 UseMethod("getFitted", object) | |
2772 } | |
2773 | |
2774 #' @rdname getFitted | |
2775 #' @export | |
2776 getFitted.default <- function (object,...){ | |
2777 fitted(object, ...) | |
2778 } | |
2779 | |
2780 #' has NA | |
2781 #' | |
2782 #' checks if the fitted model excluded NA values | |
2783 #' | |
2784 #' @param object a fitted model | |
2785 #' | |
2786 #' @details Checks if the fitted model excluded NA values | |
2787 #' | |
2788 #' @export | |
2789 | |
2790 | |
2791 # hasNA <- function(object){ | |
2792 # x = rownames(model.frame(object)) | |
2793 # if(length(x) < as.numeric(x[length(x) ])) return(TRUE) | |
2794 # else return(FALSE) | |
2795 # } | |
2796 | |
2797 ######### LM ############# | |
2798 | |
2799 #' @rdname getRefit | |
2800 #' @export | |
2801 getRefit.lm <- function(object, newresp, ...){ | |
2802 | |
2803 newData <-model.frame(object) | |
2804 | |
2805 if(is.vector(newresp)){ | |
2806 newData[,1] = newresp | |
2807 } else if (is.factor(newresp)){ | |
2808 # Hack to make the factor binomial case work | |
2809 newData[,1] = as.numeric(newresp) - 1 | |
2810 } else { | |
2811 # Hack to make the binomial n/k case work | |
2812 newData[[1]] = NULL | |
2813 newData = cbind(newresp, newData) | |
2814 } | |
2815 | |
2816 refittedModel = update(object, data = newData) | |
2817 return(refittedModel) | |
2818 } | |
2819 | |
2820 | |
2821 hasWeigths.lm <- function(object, ...){ | |
2822 if(length(unique(object$prior.weights)) != 1) return(TRUE) | |
2823 else return(FALSE) | |
2824 } | |
2825 | |
2826 | |
2827 ######### GLM ############# | |
2828 | |
2829 #' @rdname getSimulations | |
2830 #' @export | |
2831 getSimulations.negbin<- function (object, nsim = 1, type = c("normal", "refit"), ...){ | |
2832 if("(weights)" %in% colnames(model.frame(object))) warning(weightsWarning) | |
2833 getSimulations.default(object = object, nsim = nsim, type = type, ...) | |
2834 } | |
2835 | |
2836 | |
2837 ######## MGCV ############ | |
2838 | |
2839 # This function overwrites the standard fitted function for GAM | |
2840 | |
2841 #' @rdname getFitted | |
2842 #' @export | |
2843 getFitted.gam <- function(object, ...){ | |
2844 class(object) = "glm" | |
2845 out = stats::fitted(object, ...) | |
2846 names(out) = as.character(1:length(out)) | |
2847 out | |
2848 } | |
2849 | |
2850 # Check that this works | |
2851 # plot(fitted(fittedModelGAM), predict(fittedModelGAM, type = "response")) | |
2852 | |
2853 | |
2854 ######## lme4 ############ | |
2855 | |
2856 | |
2857 #' @rdname getSimulations | |
2858 #' @export | |
2859 getSimulations.lmerMod <- function (object, nsim = 1, type = c("normal", "refit"), ...){ | |
2860 | |
2861 if("(weights)" %in% colnames(model.frame(object))) warning(weightsWarning) | |
2862 getSimulations.default(object = object, nsim = nsim, type = type, ...) | |
2863 } | |
2864 | |
2865 | |
2866 ######## glmmTMB ###### | |
2867 | |
2868 #' @rdname getRefit | |
2869 #' @export | |
2870 getRefit.glmmTMB <- function(object, newresp, ...){ | |
2871 newData <-model.frame(object) | |
2872 | |
2873 # hack to make update work - for some reason, glmmTMB wants the matrix embedded in the df for update to work ... should be solved ideally, see https://github.com/glmmTMB/glmmTMB/issues/549 | |
2874 if(is.matrix(newresp)){ | |
2875 tmp = colnames(newData[[1]]) | |
2876 newData[[1]] = NULL | |
2877 newData = cbind(newresp, newData) | |
2878 colnames(newData)[1:2] = tmp | |
2879 } else { | |
2880 newData[[1]] = newresp | |
2881 } | |
2882 refittedModel = update(object, data = newData) | |
2883 return(refittedModel) | |
2884 } | |
2885 | |
2886 | |
2887 # glmmTMB simulates normal counts (and not proportions in any case, so the check for the other models is not needed), see #158 | |
2888 # note that if observation is factor - unlike lme4 and older, glmmTMB simulates nevertheless as numeric | |
2889 | |
2890 #' @rdname getSimulations | |
2891 #' @export | |
2892 getSimulations.glmmTMB <- function (object, nsim = 1, type = c("normal", "refit"), ...){ | |
2893 | |
2894 if("(weights)" %in% colnames(model.frame(object)) & ! family(object)$family %in% c("binomial", "betabinomial")) warning(weightsWarning) | |
2895 | |
2896 type <- match.arg(type) | |
2897 | |
2898 out = simulate(object, nsim = nsim, ...) | |
2899 | |
2900 if (type == "normal"){ | |
2901 if (is.matrix(out[[1]])){ | |
2902 # this is for the k/n binomial case | |
2903 out = as.matrix(out)[,seq(1, (2*nsim), by = 2)] | |
2904 } | |
2905 if(!is.matrix(out)) out = data.matrix(out) | |
2906 }else{ | |
2907 | |
2908 # check for weights in k/n case | |
2909 if(family(object)$family %in% c("binomial", "betabinomial")){ | |
2910 if("(weights)" %in% colnames(model.frame(object))){ | |
2911 w = model.frame(object) | |
2912 w = w$`(weights)` | |
2913 tmp <- function(x)x/w | |
2914 out = apply(out, 2, tmp) | |
2915 out = as.data.frame(out) | |
2916 } | |
2917 else if(is.matrix(out[[1]]) & !is.matrix(model.frame(object)[[1]])){ | |
2918 out = as.data.frame(as.matrix(out)[,seq(1, (2*nsim), by = 2)]) | |
2919 } | |
2920 } | |
2921 | |
2922 | |
2923 | |
2924 | |
2925 | |
2926 | |
2927 # matrixResp = | |
2928 # | |
2929 # if(matrixResp & !is.null(ncol(newresp))){ | |
2930 # # Hack to make the factor binomial case work | |
2931 # tmp = colnames(newData[[1]]) | |
2932 # newData[[1]] = NULL | |
2933 # newData = cbind(newresp, newData) | |
2934 # colnames(newData)[1:2] = tmp | |
2935 # } else if(!is.null(ncol(newresp))){ | |
2936 # newData[[1]] = newresp[,1] | |
2937 # } else { | |
2938 # newData[[1]] = newresp | |
2939 # } | |
2940 | |
2941 | |
2942 # if (out$modelClass == "glmmTMB" & ncol(simulations) == 2*n) simObserved = simulations[,(1+(2*(i-1))):(2+(2*(i-1)))] | |
2943 } | |
2944 | |
2945 # else securityAssertion("Simulation results produced unsupported data structure", stop = TRUE) | |
2946 | |
2947 return(out) | |
2948 } | |
2949 | |
2950 ####### spaMM ######### | |
2951 | |
2952 #' @rdname getObservedResponse | |
2953 #' @export | |
2954 getObservedResponse.HLfit <- function(object, ...){ | |
2955 out = spaMM::response(object, ...) | |
2956 | |
2957 nKcase = is.matrix(out) | |
2958 if(nKcase){ | |
2959 if(! (family(object) %in% c("binomial", "betabinomial"))) securityAssertion("nKcase - wrong family") | |
2960 if(! (ncol(out)==2)) securityAssertion("nKcase - wrong dimensions of response") | |
2961 out = out[,1] | |
2962 } | |
2963 | |
2964 if(!is.numeric(out)) out = as.numeric(out) | |
2965 | |
2966 return(out) | |
2967 | |
2968 } | |
2969 | |
2970 #' @rdname getSimulations | |
2971 #' @export | |
2972 getSimulations.HLfit <- function(object, nsim = 1, type = c("normal", "refit"), ...){ | |
2973 | |
2974 type <- match.arg(type) | |
2975 | |
2976 capture.output({out = simulate(object, nsim = nsim, ...)}) | |
2977 | |
2978 if(type == "normal"){ | |
2979 if(!is.matrix(out)) out = data.matrix(out) | |
2980 }else{ | |
2981 out = as.data.frame(out) | |
2982 } | |
2983 return(out) | |
2984 } | |
2985 | |
2986 #' @rdname getRefit | |
2987 #' @export | |
2988 getRefit.HLfit <- function(object, newresp, ...) { | |
2989 spaMM::update_resp(object, newresp, evaluate = TRUE) | |
2990 } | |
2991 | |
2992 ####################### compatibility.R | |
2993 | |
2994 ####################### helper.R | |
2995 | |
2996 #' Modified ECDF function | |
2997 #' | |
2998 #' @details ensures symmetric ECDF (standard ECDF is <), and that 0 / 1 values are only produced if the data is strictly < > than the observed data | |
2999 #' | |
3000 #' @keywords internal | |
3001 DHARMa.ecdf <- function (x) | |
3002 { | |
3003 x <- sort(x) | |
3004 n <- length(x) | |
3005 if (n < 1) | |
3006 stop(paste("DHARMa.ecdf - length vector < 1", x)) | |
3007 vals <- unique(x) | |
3008 rval <- approxfun(vals, cumsum(tabulate(match(x, vals)))/ (n +1), | |
3009 method = "linear", yleft = 0, yright = 1, ties = "ordered") | |
3010 class(rval) <- c("ecdf", "stepfun", class(rval)) | |
3011 assign("nobs", n, envir = environment(rval)) | |
3012 attr(rval, "call") <- sys.call() | |
3013 rval | |
3014 } | |
3015 | |
3016 | |
3017 | |
3018 #' calculate quantiles | |
3019 #' | |
3020 #' calculates residual quantiles from a given simulation | |
3021 #' | |
3022 #' @param simulations a matrix with simulations from a fitted model. Rows = observations, columns = replicate simulations | |
3023 #' @param observed a vector with the observed data | |
3024 #' @param integerResponse is the response integer-valued. Only has an effect for method = "traditional" | |
3025 #' @param method the quantile randomization method used. See details | |
3026 #' | |
3027 #' @details The function calculates residual quantiles from the simulated data. For continous distributions, this will simply the the value of the ecdf. | |
3028 #' | |
3029 #' For discrete data, there are two options implemented. | |
3030 #' | |
3031 #' The current default (available since DHARMa 0.3.1) are probability integral transform (PIT-) residuals (Smith, 1985; Dunn & Smyth, 1996; see also see also Warton, et al., 2017). | |
3032 #' | |
3033 #' Before DHARMa 0.3.1, a different randomization procedure was used, in which the a U(-0.5, 0.5) distribution was added on observations and simualtions for discrete distributions. For a completely discrete distribution, the two procedures should deliver equivalent results, but the second method has the disadvantage that a) one has to know if the distribution is discrete (DHARMa tries to recognize this automatically), and b) that it leads to inefficiencies for some distributions such as the the Tweedie, which are partly continous, partly discrte (see e.g. https://github.com/florianhartig/DHARMa/issues/168). | |
3034 #' | |
3035 #' @references | |
3036 #' | |
3037 #' Smith, J. Q. "Diagnostic checks of non-standard time series models." Journal of Forecasting 4.3 (1985): 283-291. | |
3038 #' | |
3039 #' Dunn, P.K., & Smyth, G.K. (1996). Randomized quantile residuals. Journal of Computational and Graphical Statistics 5, 236-244. | |
3040 #' | |
3041 #' Warton, David I., Loïc Thibaut, and Yi Alice Wang. "The PIT-trap—A “model-free” bootstrap procedure for inference about regression models with discrete, multivariate responses." PloS one 12.7 (2017) | |
3042 #' | |
3043 #' @export | |
3044 getQuantile <- function(simulations, observed, integerResponse, method = c("PIT", "traditional")){ | |
3045 | |
3046 method = match.arg(method) | |
3047 | |
3048 n = length(observed) | |
3049 if (nrow(simulations) != n) stop("DHARMa::getquantile: wrong dimension of simulations") | |
3050 nSim = ncol(simulations) | |
3051 | |
3052 | |
3053 if(method == "traditional"){ | |
3054 | |
3055 if(integerResponse == F){ | |
3056 | |
3057 if(any(duplicated(observed))) message("Model family was recognized or set as continuous, but duplicate values were detected in the response. Consider if you are fitting an appropriate model.") | |
3058 | |
3059 values = as.vector(simulations)[duplicated(as.vector(simulations))] | |
3060 if(length(values) > 0){ | |
3061 if (all(values%%1==0)){ | |
3062 integerResponse = T | |
3063 message("Model family was recognized or set as continuous, but duplicate values were detected in the simulation - changing to integer residuals (see ?simulateResiduals for details)") | |
3064 } else { | |
3065 message("Duplicate non-integer values found in the simulation. If this is because you are fitting a non-inter valued discrete response model, note that DHARMa does not perform appropriate randomization for such cases.") | |
3066 } | |
3067 | |
3068 } | |
3069 } | |
3070 | |
3071 scaledResiduals = rep(NA, n) | |
3072 for (i in 1:n){ | |
3073 if(integerResponse == T){ | |
3074 scaledResiduals[i] <- DHARMa.ecdf(simulations[i,] + runif(nSim, -0.5, 0.5))(observed[i] + runif(1, -0.5, 0.5)) | |
3075 }else{ | |
3076 scaledResiduals[i] <- DHARMa.ecdf(simulations[i,])(observed[i]) | |
3077 } | |
3078 } | |
3079 | |
3080 } else { | |
3081 | |
3082 | |
3083 scaledResiduals = rep(NA, n) | |
3084 for (i in 1:n){ | |
3085 min <- sum(simulations[i,] < observed[i]) / length(simulations[i,]) | |
3086 max <- sum(simulations[i,] <= observed[i]) / length(simulations[i,]) | |
3087 if (min == max) scaledResiduals[i] = DHARMa.ecdf(simulations[i,])(observed[i]) | |
3088 else{ | |
3089 scaledResiduals[i] = runif(1, min, max) | |
3090 } | |
3091 } | |
3092 } | |
3093 | |
3094 return(scaledResiduals) | |
3095 } | |
3096 | |
3097 # | |
3098 # | |
3099 # testData = createData(sampleSize = 200, family = gaussian(), | |
3100 # randomEffectVariance = 0, numGroups = 5) | |
3101 # fittedModel <- glmmTMB(observedResponse ~ Environment1, | |
3102 # data = testData) | |
3103 # simulationOutput <- simulateResiduals(fittedModel = fittedModel) | |
3104 # | |
3105 # sims = simulationOutput$simulatedResponse | |
3106 # sims[1, c(1,6,8)] = 0 | |
3107 # any(apply(sims, 1, anyDuplicated)) | |
3108 # getQuantile(simulations = sims, observed = testData$observedResponse, n = 200, integerResponse = F, nSim = 250) | |
3109 # | |
3110 # | |
3111 # | |
3112 | |
3113 | |
3114 | |
3115 #' Check dot operator | |
3116 #' | |
3117 #' @param name variable name | |
3118 #' @param default variable default | |
3119 #' | |
3120 #' @details modified from https://github.com/lcolladotor/dots | |
3121 #' | |
3122 #' @keywords internal | |
3123 checkDots <- function(name, default, ...) { | |
3124 args <- list(...) | |
3125 if(!name %in% names(args)) { | |
3126 ## Default value | |
3127 return(default) | |
3128 } else { | |
3129 ## If the argument was defined in the ... part, return it | |
3130 return(args[[name]]) | |
3131 } | |
3132 } | |
3133 | |
3134 | |
3135 securityAssertion <- function(context = "Not provided", stop = F){ | |
3136 generalMessage = "Message from DHARMa: During the execution of a DHARMa function, some unexpected conditions occurred. Even if you didn't get an error, your results may not be reliable. Please check with the help if you use the functions as intended. If you think that the error is not on your side, I would be grateful if you could report the problem at https://github.com/florianhartig/DHARMa/issues \n\n Context:" | |
3137 if (stop == F) warning(paste(generalMessage, context)) | |
3138 else stop(paste(generalMessage, context)) | |
3139 } | |
3140 | |
3141 ####################### helper.R | |
3142 | |
3143 ####################### plot.R | |
3144 | |
3145 #' DHARMa standard residual plots | |
3146 #' | |
3147 #' This function creates standard plots for the simulated residuals | |
3148 #' @param x an object with simulated residuals created by \code{\link{simulateResiduals}} | |
3149 #' @param rank if T (default), the values of pred will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed. | |
3150 #' @param ... further options for \code{\link{plotResiduals}}. Consider in particular parameters quantreg, rank and asFactor. xlab, ylab and main cannot be changed when using plotSimulatedResiduals, but can be changed when using plotResiduals. | |
3151 #' @details The function creates two plots. To the left, a qq-uniform plot to detect deviations from overall uniformity of the residuals (calling \code{\link{plotQQunif}}), and to the right, a plot of residuals against predicted values (calling \code{\link{plotResiduals}}). Outliers are highlighted in red (for more on outliers, see \code{\link{testOutliers}}). For a correctly specified model, we would expect | |
3152 #' | |
3153 #' a) a straight 1-1 line in the uniform qq-plot -> evidence for an overall uniform (flat) distribution of the residuals | |
3154 #' | |
3155 #' b) uniformity of residuals in the vertical direction in the res against predictor plot | |
3156 #' | |
3157 #' Deviations of this can be interpreted as for a linear regression. See the vignette for detailed examples. | |
3158 #' | |
3159 #' To provide a visual aid in detecting deviations from uniformity in y-direction, the plot of the residuals against the predicted values also performs an (optional) quantile regression, which provides 0.25, 0.5 and 0.75 quantile lines across the plots. These lines should be straight, horizontal, and at y-values of 0.25, 0.5 and 0.75. Note, however, that some deviations from this are to be expected by chance, even for a perfect model, especially if the sample size is small. See further comments on this plot, its interpretation and options, in \code{\link{plotResiduals}} | |
3160 #' | |
3161 #' The quantile regression can take some time to calculate, especially for larger datasets. For that reason, quantreg = F can be set to produce a smooth spline instead. This is default for n > 2000. | |
3162 #' | |
3163 #' @seealso \code{\link{plotResiduals}}, \code{\link{plotQQunif}} | |
3164 #' @example inst/examples/plotsHelp.R | |
3165 #' @import graphics | |
3166 #' @import utils | |
3167 #' @export | |
3168 plot.DHARMa <- function(x, rank = TRUE, ...){ | |
3169 | |
3170 oldpar <- par(mfrow = c(1,2), oma = c(0,1,2,1)) | |
3171 on.exit(par(oldpar)) | |
3172 | |
3173 plotQQunif(x) | |
3174 plotResiduals(x, rank = rank, ...) | |
3175 | |
3176 mtext("DHARMa residual diagnostics", outer = T) | |
3177 } | |
3178 | |
3179 | |
3180 #' Histogram of DHARMa residuals | |
3181 #' | |
3182 #' The function produces a histogram from a DHARMa output | |
3183 #' | |
3184 #' @param x a DHARMa simulation output (class DHARMa) | |
3185 #' @param breaks breaks for hist() function | |
3186 #' @param col col for hist bars | |
3187 #' @param main plot main | |
3188 #' @param xlab plot xlab | |
3189 #' @param cex.main plot cex.main | |
3190 #' @param ... other arguments to be passed on to hist | |
3191 #' @seealso \code{\link{plotSimulatedResiduals}}, \code{\link{plotResiduals}} | |
3192 #' @example inst/examples/plotsHelp.R | |
3193 #' @export | |
3194 hist.DHARMa <- function(x, | |
3195 breaks = seq(-0.02, 1.02, len = 53), | |
3196 col = c("red",rep("lightgrey",50), "red"), | |
3197 main = "Hist of DHARMa residuals", | |
3198 xlab = "Residuals (outliers are marked red)", | |
3199 cex.main = 1, | |
3200 ...){ | |
3201 | |
3202 x = ensureDHARMa(x, convert = T) | |
3203 | |
3204 val = x$scaledResiduals | |
3205 val[val == 0] = -0.01 | |
3206 val[val == 1] = 1.01 | |
3207 | |
3208 hist(val, breaks = breaks, col = col, main = main, xlab = xlab, cex.main = cex.main, ...) | |
3209 } | |
3210 | |
3211 | |
3212 #' DHARMa standard residual plots | |
3213 #' | |
3214 #' DEPRECATED, use plot() instead | |
3215 #' | |
3216 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}} | |
3217 #' @param ... further options for \code{\link{plotResiduals}}. Consider in particular parameters quantreg, rank and asFactor. xlab, ylab and main cannot be changed when using plotSimulatedResiduals, but can be changed when using plotResiduals. | |
3218 #' @note This function is deprecated. Use \code{\link{plot.DHARMa}} | |
3219 #' | |
3220 #' @seealso \code{\link{plotResiduals}}, \code{\link{plotQQunif}} | |
3221 #' @export | |
3222 plotSimulatedResiduals <- function(simulationOutput, ...){ | |
3223 message("plotSimulatedResiduals is deprecated, please switch your code to simply using the plot() function") | |
3224 plot(simulationOutput, ...) | |
3225 } | |
3226 | |
3227 | |
3228 #' Quantile-quantile plot for a uniform distribution | |
3229 #' | |
3230 #' The function produces a uniform quantile-quantile plot from a DHARMa output | |
3231 #' | |
3232 #' @param simulationOutput a DHARMa simulation output (class DHARMa) | |
3233 #' @param testUniformity if T, the function \code{\link{testUniformity}} will be called and the result will be added to the plot | |
3234 #' @param testOutliers if T, the function \code{\link{testOutliers}} will be called and the result will be added to the plot | |
3235 #' @param testDispersion if T, the function \code{\link{testDispersion}} will be called and the result will be added to the plot | |
3236 #' @param ... arguments to be passed on to \code{\link[gap]{qqunif}} | |
3237 #' | |
3238 #' @details the function calls qqunif from the R package gap to create a quantile-quantile plot for a uniform distribution. | |
3239 #' @seealso \code{\link{plotSimulatedResiduals}}, \code{\link{plotResiduals}} | |
3240 #' @example inst/examples/plotsHelp.R | |
3241 #' @export | |
3242 plotQQunif <- function(simulationOutput, testUniformity = T, testOutliers = T, testDispersion = T, ...){ | |
3243 | |
3244 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model") | |
3245 | |
3246 gap::qqunif(simulationOutput$scaledResiduals,pch=2,bty="n", logscale = F, col = "black", cex = 0.6, main = "QQ plot residuals", cex.main = 1, ...) | |
3247 | |
3248 if(testUniformity == TRUE){ | |
3249 temp = testUniformity(simulationOutput, plot = F) | |
3250 legend("topleft", c(paste("KS test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n") | |
3251 } | |
3252 | |
3253 if(testOutliers == TRUE){ | |
3254 temp = testOutliers(simulationOutput, plot = F) | |
3255 legend("bottomright", c(paste("Outlier test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n") | |
3256 } | |
3257 | |
3258 if(testDispersion == TRUE){ | |
3259 temp = testDispersion(simulationOutput, plot = F) | |
3260 legend("center", c(paste("Dispersion test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n") | |
3261 } | |
3262 | |
3263 } | |
3264 | |
3265 | |
3266 #' Generic res ~ pred scatter plot with spline or quantile regression on top | |
3267 #' | |
3268 #' The function creates a generic residual plot with either spline or quantile regression to highlight patterns in the residuals. Outliers are highlighted in red. | |
3269 #' | |
3270 #' @param simulationOutput on object, usually a DHARMa object, from which residual values can be extracted. Alternatively, a vector with residuals or a fitted model can be provided, which will then be transformed into a DHARMa object. | |
3271 #' @param form optional predictor against which the residuals should be plotted. Default is to used the predicted(simulationOutput) | |
3272 #' @param quantreg whether to perform a quantile regression on 0.25, 0.5, 0.75 on the residuals. If F, a spline will be created instead. Default NULL chooses T for nObs < 2000, and F otherwise. | |
3273 #' @param rank if T, the values provided in form will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed. If form is a factor, this has no effect. | |
3274 #' @param asFactor should a numeric predictor provided in form be treated as a factor. Default is to choose this for < 10 unique values, as long as enough predictions are available to draw a boxplot. | |
3275 #' @param smoothScatter if T, a smooth scatter plot will plotted instead of a normal scatter plot. This makes sense when the number of residuals is very large. Default NULL chooses T for nObs < 10000, and F otherwise. | |
3276 #' @param quantiles for a quantile regression, which quantiles should be plotted | |
3277 #' @param ... additional arguments to plot / boxplot. | |
3278 #' @details The function plots residuals against a predictor (by default against the fitted value, extracted from the DHARMa object, or any other predictor). | |
3279 #' | |
3280 #' Outliers are highlighted in red (for information on definition and interpretation of outliers, see \code{\link{testOutliers}}). | |
3281 #' | |
3282 #' To provide a visual aid in detecting deviations from uniformity in y-direction, the plot function calculates an (optional) quantile regression, which compares the empirical 0.25, 0.5 and 0.75 quantiles (default) in y direction (red solid lines) with the theoretical 0.25, 0.5 and 0.75 quantiles (dashed black line). | |
3283 #' | |
3284 #' Asymptotically (i.e. for lots of data / residuals), if the model is correct, theoretical and the empirical quantiles should be identical (i.e. dashed and solid lines should match). A p-value for the deviation is calculated for each quantile line. Significant deviations are highlighted by red color. | |
3285 #' | |
3286 #' If form is a factor, a boxplot will be plotted instead of a scatter plot. The distribution for each factor level should be uniformly distributed, so the box should go from 0.25 to 0.75, with the median line at 0.5. Again, chance deviations from this will increases when the sample size is smaller. You can run null simulations to test if the deviations you see exceed what you would expect from random variation. If you want to create box plots for categorical predictors (e.g. because you only have a small number of unique numeric predictor values), you can convert your predictor with as.factor(pred) | |
3287 #' | |
3288 #' @return if quantile tests are performed, the function returns them invisibly. | |
3289 #' | |
3290 #' @note The quantile regression can take some time to calculate, especially for larger datasets. For that reason, quantreg = F can be set to produce a smooth spline instead. | |
3291 #' | |
3292 #' @seealso \code{\link{plotQQunif}} | |
3293 #' @example inst/examples/plotsHelp.R | |
3294 #' @export | |
3295 plotResiduals <- function(simulationOutput, form = NULL, quantreg = NULL, rank = F, asFactor = NULL, smoothScatter = NULL, quantiles = c(0.25, 0.5, 0.75), ...){ | |
3296 | |
3297 | |
3298 ##### Checks ##### | |
3299 | |
3300 | |
3301 a <- list(...) | |
3302 a$ylab = checkDots("ylab", "Standardized residual", ...) | |
3303 if(is.null(form)){ | |
3304 a$xlab = checkDots("xlab", ifelse(rank, "Model predictions (rank transformed)", "Model predictions"), ...) | |
3305 } | |
3306 | |
3307 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
3308 res = simulationOutput$scaledResiduals | |
3309 if(inherits(form, "DHARMa"))stop("DHARMa::plotResiduals > argument form cannot be of class DHARMa. Note that the syntax of plotResiduals has changed since DHARMa 0.3.0. See ?plotResiduals.") | |
3310 | |
3311 pred = ensurePredictor(simulationOutput, form) | |
3312 | |
3313 ##### Rank transform and factor conversion##### | |
3314 | |
3315 if(!is.factor(pred)){ | |
3316 | |
3317 if (rank == T){ | |
3318 pred = rank(pred, ties.method = "average") | |
3319 pred = pred / max(pred) | |
3320 } | |
3321 | |
3322 nuniq = length(unique(pred)) | |
3323 ndata = length(pred) | |
3324 if(is.null(asFactor)) asFactor = (nuniq == 1) | (nuniq < 10 & ndata / nuniq > 10) | |
3325 if (asFactor) pred = factor(pred) | |
3326 } | |
3327 | |
3328 ##### Residual scatter plots ##### | |
3329 | |
3330 if(is.null(quantreg)) if (length(res) > 2000) quantreg = FALSE else quantreg = TRUE | |
3331 | |
3332 switchScatter = 10000 | |
3333 if(is.null(smoothScatter)) if (length(res) > switchScatter) smoothScatter = TRUE else smoothScatter = FALSE | |
3334 | |
3335 blackcol = rgb(0,0,0, alpha = max(0.1, 1 - 3 * length(res) / switchScatter)) | |
3336 | |
3337 | |
3338 # categorical plot | |
3339 if(is.factor(pred)){ | |
3340 do.call(plot, append(list(res ~ pred, ylim = c(0,1), axes = FALSE), a)) | |
3341 } | |
3342 # smooth scatter | |
3343 else if (smoothScatter == TRUE) { | |
3344 defaultCol = ifelse(res == 0 | res == 1, 2,blackcol) | |
3345 do.call(graphics::smoothScatter, append(list(x = pred, y = res , ylim = c(0,1), axes = FALSE, colramp = colorRampPalette(c("white", "darkgrey"))),a)) | |
3346 points(pred[defaultCol == 2], res[defaultCol == 2], col = "red", cex = 0.5) | |
3347 } | |
3348 # normal plot | |
3349 else{ | |
3350 defaultCol = ifelse(res == 0 | res == 1, 2,blackcol) | |
3351 defaultPch = ifelse(res == 0 | res == 1, 8,1) | |
3352 a$col = checkDots("col", defaultCol, ...) | |
3353 a$pch = checkDots("pch", defaultPch, ...) | |
3354 do.call(plot, append(list(res ~ pred, ylim = c(0,1), axes = FALSE), a)) | |
3355 } | |
3356 | |
3357 axis(1) | |
3358 axis(2, at=c(0, 0.25, 0.5, 0.75, 1)) | |
3359 | |
3360 ##### Quantile regressions ##### | |
3361 | |
3362 main = checkDots("main", "Residual vs. predicted", ...) | |
3363 out = NULL | |
3364 | |
3365 if(is.numeric(pred)){ | |
3366 if(quantreg == F){ | |
3367 title(main = main, cex.main = 1) | |
3368 abline(h = c(0.25, 0.5, 0.75), col = "black", lwd = 0.5, lty = 2) | |
3369 try({ | |
3370 lines(smooth.spline(pred, res, df = 10), lty = 2, lwd = 2, col = "red") | |
3371 abline(h = 0.5, col = "red", lwd = 2) | |
3372 }, silent = T) | |
3373 }else{ | |
3374 | |
3375 out = testQuantiles(simulationOutput, pred, quantiles = quantiles, plot = F) | |
3376 | |
3377 | |
3378 if(any(out$pvals < 0.05, na.rm = TRUE)){ | |
3379 main = paste(main, "Quantile deviations detected (red curves)", sep ="\n") | |
3380 if(out$p.value <= 0.05){ | |
3381 main = paste(main, "Combined adjusted quantile test significant", sep ="\n") | |
3382 } else { | |
3383 main = paste(main, "Combined adjusted quantile test n.s.", sep ="\n") | |
3384 } | |
3385 maincol = "red" | |
3386 } else { | |
3387 main = paste(main, "No significant problems detected", sep ="\n") | |
3388 maincol = "black" | |
3389 } | |
3390 | |
3391 | |
3392 title(main = main, cex.main = 0.8, | |
3393 col.main = maincol) | |
3394 | |
3395 for(i in 1:length(quantiles)){ | |
3396 | |
3397 lineCol = ifelse(out$pvals[i] <= 0.05 & !(is.na(out$pvals[i])), "red", "black") | |
3398 filCol = ifelse(out$pvals[i] <= 0.05 & !(is.na(out$pvals[i])), "#FF000040", "#00000020") | |
3399 | |
3400 abline(h = quantiles[i], col = lineCol, lwd = 0.5, lty = 2) | |
3401 polygon(c(out$predictions$pred, rev(out$predictions$pred)), | |
3402 c(out$predictions[,2*i] - out$predictions[,2*i+1], rev(out$predictions[,2*i] + out$predictions[,2*i+1])), | |
3403 col = "#00000020", border = F) | |
3404 lines(out$predictions$pred, out$predictions[,2*i], col = lineCol, lwd = 2) | |
3405 } | |
3406 | |
3407 # legend("bottomright", c(paste("Quantile test: p=", round(out$p.value, digits = 5)), paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(out$p.value < 0.05, "red", "black" ), bty="n") | |
3408 | |
3409 } | |
3410 } | |
3411 invisible(out) | |
3412 } | |
3413 | |
3414 x = 0.01 | |
3415 x <= 0.05 & !(is.na(x)) | |
3416 | |
3417 | |
3418 #' Ensures the existence of a valid predictor to plot residuals against | |
3419 #' | |
3420 #' @param simulationOutput a DHARMa simulation output or an object that can be converted into a DHARMa simulation output | |
3421 #' @param predictor an optional predictor. If no predictor is provided, will try to extract the fitted value | |
3422 #' @keywords internal | |
3423 ensurePredictor <- function(simulationOutput, | |
3424 predictor = NULL){ | |
3425 if(!is.null(predictor)){ | |
3426 | |
3427 if(length(predictor) != length(simulationOutput$scaledResiduals)) stop("DHARMa: residuals and predictor do not have the same length. The issue is possibly that you have NAs in your predictor that were removed during the model fit. Remove the NA values from your predictor.") | |
3428 } else { | |
3429 | |
3430 predictor = simulationOutput$fittedPredictedResponse | |
3431 if(is.null(predictor)) stop("DHARMa: can't extract predictor from simulationOutput, and no predictor provided") | |
3432 } | |
3433 return(predictor) | |
3434 } | |
3435 | |
3436 | |
3437 | |
3438 | |
3439 #plot(simulationOutput) | |
3440 | |
3441 #plot(simulationOutput$observedResponse, simulationOutput$scaledResiduals, xlab = "predicted", ylab = "Residual", main = "Residual vs. predicted") | |
3442 | |
3443 #plot(simulationOutput$observedResponse, simulationOutput$fittedPredictedResponse - simulationOutput$observedResponse) | |
3444 | |
3445 #plot(cumsum(sort(simulationOutput$scaledResiduals))) | |
3446 | |
3447 | |
3448 #plotConventionalResiduals(fittedModel) | |
3449 | |
3450 | |
3451 #' Conventional residual plot | |
3452 #' | |
3453 #' Convenience function to draw conventional residual plots | |
3454 #' | |
3455 #' @param fittedModel a fitted model object | |
3456 #' @export | |
3457 plotConventionalResiduals <- function(fittedModel){ | |
3458 opar <- par(mfrow = c(1,3), oma = c(0,1,2,1)) | |
3459 on.exit(par(opar)) | |
3460 plot(predict(fittedModel), resid(fittedModel, type = "deviance"), main = "Deviance" , ylab = "Residual", xlab = "Predicted") | |
3461 plot(predict(fittedModel), resid(fittedModel, type = "pearson") , main = "Pearson", ylab = "Residual", xlab = "Predicted") | |
3462 plot(predict(fittedModel), resid(fittedModel, type = "response") , main = "Raw residuals" , ylab = "Residual", xlab = "Predicted") | |
3463 mtext("Conventional residual plots", outer = T) | |
3464 } | |
3465 | |
3466 | |
3467 | |
3468 | |
3469 # | |
3470 # | |
3471 # if(quantreg == F){ | |
3472 # | |
3473 # lines(smooth.spline(simulationOutput$fittedPredictedResponse, simulationOutput$scaledResiduals, df = 10), lty = 2, lwd = 2, col = "red") | |
3474 # | |
3475 # abline(h = 0.5, col = "red", lwd = 2) | |
3476 # | |
3477 # }else{ | |
3478 # | |
3479 # #library(gamlss) | |
3480 # | |
3481 # # qrnn | |
3482 # | |
3483 # # http://r.789695.n4.nabble.com/Quantile-GAM-td894280.html | |
3484 # | |
3485 # #require(quantreg) | |
3486 # #dat <- plyr::arrange(dat,pred) | |
3487 # #fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.5,data = dat) | |
3488 # | |
3489 # probs = c(0.25, 0.50, 0.75) | |
3490 # | |
3491 # w <- p <- list() | |
3492 # for(i in seq_along(probs)){ | |
3493 # capture.output(w[[i]] <- qrnn::qrnn.fit(x = as.matrix(simulationOutput$fittedPredictedResponse), y = as.matrix(simulationOutput$scaledResiduals), n.hidden = 4, tau = probs[i], iter.max = 1000, n.trials = 1, penalty = 1)) | |
3494 # p[[i]] <- qrnn::qrnn.predict(as.matrix(sort(simulationOutput$fittedPredictedResponse)), w[[i]]) | |
3495 # } | |
3496 # | |
3497 # | |
3498 # | |
3499 # #plot(simulationOutput$fittedPredictedResponse, simulationOutput$scaledResiduals, xlab = "Predicted", ylab = "Residual", main = "Residual vs. predicted\n lines should match", cex.main = 1) | |
3500 # | |
3501 # #lines(sort(simulationOutput$fittedPredictedResponse), as.vector(p[[1]]), col = "red") | |
3502 # | |
3503 # matlines(sort(simulationOutput$fittedPredictedResponse), matrix(unlist(p), nrow = length(simulationOutput$fittedPredictedResponse), ncol = length(p)), col = "red", lty = 1) | |
3504 # | |
3505 # # as.vector(p[[1]]) | |
3506 # # | |
3507 # # | |
3508 # # lines(simulationOutput$fittedPredictedResponse,p[[1]], col = "red", lwd = 2) | |
3509 # # abline(h = 0.5, col = "red", lwd = 2) | |
3510 # # | |
3511 # # fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.25,data = dat) | |
3512 # # lines(unique(dat$pred)[-1],fit$coef[1] + fit$coef[-1], col = "green", lwd = 2, lty =2) | |
3513 # # abline(h = 0.25, col = "green", lwd = 2, lty =2) | |
3514 # # | |
3515 # # fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.75,data = dat) | |
3516 # # lines(unique(dat$pred)[-1],fit$coef[1] + fit$coef[-1], col = "blue", lwd = 2, lty = 2) | |
3517 # # abline(h = 0.75, col = "blue", lwd = 2, lty =2) | |
3518 # } | |
3519 | |
3520 ####################### plot.R | |
3521 | |
3522 ####################### random.R | |
3523 | |
3524 #' Record and restore a random state | |
3525 #' | |
3526 #' The aim of this function is to record, manipualate and restor a random state | |
3527 #' | |
3528 #' @details This function is intended for two (not mutually exclusive tasks) | |
3529 #' | |
3530 #' a) record the current random state | |
3531 #' | |
3532 #' b) change the current random state in a way that the previous state can be restored | |
3533 #' | |
3534 #' @return a list with various infos about the random state that after function execution, as well as a function to restore the previous state before the function execution | |
3535 #' | |
3536 #' @param seed seed argument to set.seed(). NULL = no seed, but random state will be restored. F = random state will not be restored | |
3537 #' @export | |
3538 #' @example inst/examples/getRandomStateHelp.R | |
3539 #' @author Florian Hartig | |
3540 #' | |
3541 getRandomState <- function(seed = NULL){ | |
3542 | |
3543 # better to explicitly access the global RS? | |
3544 # current = get(".Random.seed", .GlobalEnv, ifnotfound = NULL) | |
3545 | |
3546 current = mget(".Random.seed", envir = .GlobalEnv, ifnotfound = list(NULL))[[1]] | |
3547 | |
3548 if(is.logical(seed) & seed == F){ | |
3549 restoreCurrent <- function(){} | |
3550 }else{ | |
3551 restoreCurrent <- function(){ | |
3552 if(is.null(current)) rm(".Random.seed", envir = .GlobalEnv) else assign(".Random.seed", current , envir = .GlobalEnv) | |
3553 } | |
3554 } | |
3555 | |
3556 # setting seed | |
3557 if(is.numeric(seed)) set.seed(seed) | |
3558 | |
3559 # ensuring that RNG has been initialized | |
3560 if (is.null(current))runif(1) | |
3561 | |
3562 randomState = list(seed, state = get(".Random.seed", globalenv()), kind = RNGkind(), restoreCurrent = restoreCurrent) | |
3563 return(randomState) | |
3564 } | |
3565 | |
3566 ####################### random.R | |
3567 | |
3568 ######################################### Package DHARMa |