comparison FunctPAMPAGalaxy.r @ 0:f0dc3958e65d draft

"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 07f1028cc764f920b1e6419c151f04ab4e3600fa"
author ecology
date Tue, 21 Jul 2020 06:00:31 -0400
parents
children 44b9069775ca
comparison
equal deleted inserted replaced
-1:000000000000 0:f0dc3958e65d
1 #Rscript
2
3
4 ##################################################################################################################################
5 ####################### PAMPA Galaxy tools functions : Calculate metrics, compute GLM and plot #################################
6 ##################################################################################################################################
7
8 #### Based on Yves Reecht R script
9 #### Modified by Coline ROYAUX for integrating within Galaxy-E
10
11 ######################################### start of the function fact.def.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
12 ####### Define the finest aggregation with the observation table
13
14 fact.det.f <- function (Obs,
15 size.class="size.class",
16 code.especes="species.code",
17 unitobs="observation.unit")
18 {
19 if (any(is.element(c(size.class), colnames(obs))) && all(! is.na(obs[, size.class])))
20 {
21 factors <- c(unitobs, code.especes, size.class)
22 }else{
23 factors <- c(unitobs, code.especes)
24 }
25 return(factors)
26 }
27
28 ######################################### end of the function fact.def.f
29
30 ######################################### start of the function def.typeobs.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
31 ####### Define observation type from colnames
32
33 def.typeobs.f <- function(Obs)
34 {
35 if (any(is.element(c("rotation","rot","rotate"),colnames(obs))))
36 {
37 ObsType <- "SVR"
38 }else{
39 ObsType <- "other"
40 }
41 return(ObsType)
42 }
43 ######################################### end of the function fact.def.f
44
45 ######################################### start of the function create.unitobs called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
46 ####### Create unitobs column when inexistant
47 create.unitobs <- function(data,year="year",point="point", unitobs="observation.unit")
48 {
49 if (is.element(paste(unitobs),colnames(data)) && all(grepl("[1-2][0|8|9][0-9]{2}_.*",data[,unitobs])==FALSE))
50 {
51 unitab <- data
52
53 }else{
54
55 unitab <- unite(data,col="observation.unit",c(year,point))
56 }
57 return(unitab)
58 }
59 ######################################### start of the function create.unitobs
60
61 ######################################### start of the function create.year.point called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r
62 ####### separate unitobs column when existant
63 create.year.point <- function(data,year="year",point="point", unitobs="observation.unit")
64 {
65 if (all(grepl("[1-2][0|8|9][0-9]{2}_.*",data[,unitobs]))==TRUE)
66 {
67 tab <- separate(data,col=unitobs,into=c(year,point),sep="_")
68 }else{
69 tab <- separate(data,col=unitobs,into=c("site1", year,"obs"),sep=c(2,4))
70 tab <- unite(tab, col=point, c("site1","obs"))
71
72 }
73
74 tab <- cbind(tab,observation.unit = data[,unitobs])
75
76 return(tab)
77 }
78 ######################################### start of the function create.unitobs
79
80 ######################################### start of the function check_file called by every Galaxy Rscripts
81
82 check_file<-function(dataset,err_msg,vars,nb_vars){
83
84 ## Purpose: General function to check integrity of input file. Will
85 ## check numbers and contents of variables(colnames).
86 ## return an error message and exit if mismatch detected
87 ## ----------------------------------------------------------------------
88 ## Arguments: dataset : dataset name
89 ## err_msg : output error
90 ## vars : expected name of variables
91 ## nb_vars : expected number of variables
92 ## ----------------------------------------------------------------------
93 ## Author: Alan Amosse, Benjamin Yguel
94
95 if(ncol(dataset) < nb_vars){ #checking for right number of columns in the file if not = error message
96 cat("\nerr nb var\n")
97 stop(err_msg, call.=FALSE)
98 }
99
100 for(i in vars){
101 if(!(i %in% names(dataset))){ #checking colnames
102 stop(err_msg,call.=FALSE)
103 }
104 }
105 }
106
107 ######################################### end of the function check_file
108
109
110 ######################################### start of the function statRotationsNumber.f called by calc.numbers.f
111
112 statRotationsNumber.f <- function(factors, obs)
113 {
114 ## Purpose: Computing abundance statistics by rotation (max, sd)
115 ## on SVR data
116 ## ----------------------------------------------------------------------
117 ## Arguments: factors : Names of aggregation factors
118 ## obs : observation data
119 ## ----------------------------------------------------------------------
120 ## Author: Yves Reecht, Date: 29 oct. 2012, 16:01 modified by Coline ROYAUX 04 june 2020
121
122 ## Identification of valid rotations :
123 if (is.element("observation.unit", factors))
124 {
125 ## valid rotations (empty must be there as well) :
126 rotations <- tapply(obs$rotation,
127 as.list(obs[ , c("observation.unit", "rotation"), drop=FALSE]),
128 function(x)length(x) > 0)
129
130 ## Changing NA rotations in FALSE :
131 rotations[is.na(rotations)] <- FALSE
132 }else{
133 #stop(mltext("statRotations.err.1"))
134 }
135
136 ## ###########################################################
137 ## Abundance per rotation at chosen aggregation factors :
138 nombresR <- tapply(obs$number,
139 as.list(obs[ , c(factors, "rotation"), drop=FALSE]),
140 function(x,...){ifelse(all(is.na(x)), NA, sum(x,...))},
141 na.rm = TRUE)
142
143 ## If valid rotation NA are considered 0 :
144 nombresR <- sweep(nombresR,
145 match(names(dimnames(rotations)), names(dimnames(nombresR)), nomatch=NULL),
146 rotations, # Tableau des secteurs valides (booléens).
147 function(x, y)
148 {
149 x[is.na(x) & y] <- 0 # Lorsque NA et secteur valide => 0.
150 return(x)
151 })
152
153 ## ##################################################
154 ## Statistics :
155
156 ## Means :
157 nombresMean <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)),
158 function(x,...){ifelse(all(is.na(x)), NA, mean(x,...))}, na.rm=TRUE)
159
160 ## Maxima :
161 nombresMax <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)),
162 function(x,...){ifelse(all(is.na(x)), NA, max(x,...))}, na.rm=TRUE)
163
164 ## SD :
165 nombresSD <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)),
166 function(x,...){ifelse(all(is.na(x)), NA, sd(x,...))}, na.rm=TRUE)
167
168 ## Valid rotations count :
169 nombresRotations <- apply(rotations, 1, sum, na.rm=TRUE)
170
171 ## Results returned as list :
172 return(list(nombresMean=nombresMean, nombresMax=nombresMax, nombresSD=nombresSD,
173 nombresRotations=nombresRotations, nombresTot=nombresR))
174 }
175
176 ######################################### end of the function statRotationsNumber.f
177
178 ######################################### start of the function calcNumber.default.f called by calc.numbers.f
179
180 calcNumber.default.f <- function(obs,
181 factors=c("observation.unit", "species.code", "size.class"),
182 nbName="number")
183 {
184 ## Purpose : Compute abundances at finest aggregation
185 ## ---------------------------------------------------------------------
186 ## Arguments: obs : observation table
187 ## factors : aggregation factors
188 ## nbName : name of abundance column.
189 ##
190 ## Output: array with ndimensions = nfactors.
191 ## ----------------------------------------------------------------------
192 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:38 modified by Coline ROYAUX 04 june 2020
193
194 ## Sum individuals number :
195 nbr <- tapply(obs[ , nbName],
196 as.list(obs[ , factors]),
197 sum, na.rm = TRUE)
198
199 ## Absences as "true zero" :
200 nbr[is.na(nbr)] <- 0
201
202 return(nbr)
203 }
204
205 ######################################### end of the function calcNumber.default.f
206
207 ######################################### start of the function calc.numbers.f
208
209 calc.numbers.f <- function(obs, ObsType="", factors=c("observation.unit", "species.code", "size.class"), nbName="number")
210 {
211 ## Purpose: Produce data.frame used as table from output of calcNumber.default.f().
212 ## ----------------------------------------------------------------------
213 ## Arguments: obs : observation table
214 ## ObsType : Type of observation (SVR, LIT, ...)
215 ## factors : aggregation factors
216 ## nbName : name of abundance column
217 ##
218 ## Output: data.frame with (N aggregation factors + 1) columns
219 ## ----------------------------------------------------------------------
220 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:46 modified by Coline ROYAUX 04 june 2020
221
222 if (ObsType == "SVR")
223 {
224 ## Compute SVR abundances statistics :
225 statRotations <- statRotationsNumber.f(factors=factors,
226 obs=obs)
227
228 ## Mean for rotating videos (3 rotations at most times) :
229 nbr <- statRotations[["nombresMean"]]
230
231 }else{
232
233 nbr <- calcNumber.default.f(obs, factors, nbName)
234 }
235
236 res <- as.data.frame(as.table(nbr), responseName=nbName)
237
238 if (is.element("size.class", colnames(res)))
239 {
240 res$size.class[res$size.class == ""] <- NA
241 }else{}
242
243 ## If integer abundances :
244 if (isTRUE(all.equal(res[ , nbName], as.integer(res[ , nbName]))))
245 {
246 res[ , nbName] <- as.integer(res[ , nbName])
247 }else{}
248
249 if (ObsType == "SVR")
250 {
251 ## statistics on abundances :
252 res$number.max <- as.vector(statRotations[["nombresMax"]])
253 res$number.sd <- as.vector(statRotations[["nombresSD"]])
254
255 }else{}
256
257 return(res)
258 }
259
260 ######################################### end of the function calc.numbers.f
261
262 ######################################### start of the function presAbs.f called by calcBiodiv.f
263
264 presAbs.f <- function(nombres, logical=FALSE)
265 {
266 ## Purpose: Compute presence absence from abundances
267 ## ----------------------------------------------------------------------
268 ## Arguments: nombres : vector of individuals count.
269 ## logical : (boolean) results as boolean or 0/1 ?
270 ## ----------------------------------------------------------------------
271 ## Author: Yves Reecht, Date: 29 oct. 2010, 10:20 modified by Coline ROYAUX 04 june 2020
272
273 if (any(nombres < 0, na.rm=TRUE))
274 {
275 stop("Negative abundances!")
276 }else{}
277
278 if (logical)
279 {
280 return(nombres > 0)
281 }else{
282 nombres[nombres > 0] <- 1
283 return(nombres)
284 }
285 }
286
287 ######################################### end of the function presAbs.f
288
289 ######################################### start of the function betterCbind called by agregations.generic.f
290
291 betterCbind <- function(..., dfList=NULL, deparse.level = 1)
292 {
293 ## Purpose: Apply cbind to data frame with mathcing columns but without
294 ## redundancies.
295 ## ----------------------------------------------------------------------
296 ## Arguments: same as cbind...
297 ## dfList : data.frames list
298 ## ----------------------------------------------------------------------
299 ## Author: Yves Reecht, Date: 17 janv. 2012, 21:10 modified by Coline ROYAUX 04 june 2020
300
301 if (is.null(dfList))
302 {
303 dfList <- list(...)
304 }else{}
305
306 return(do.call(cbind,
307 c(list(dfList[[1]][ , c(tail(colnames(dfList[[1]]), -1),
308 head(colnames(dfList[[1]]), 1))]),
309 lapply(dfList[-1],
310 function(x, colDel)
311 {
312 return(x[ , !is.element(colnames(x),
313 colDel),
314 drop=FALSE])
315 },
316 colDel=colnames(dfList[[1]])),
317 deparse.level=deparse.level)))
318 }
319
320 ######################################### end of the function betterCbind
321
322 ######################################### start of the function agregation.f called by agregations.generic.f
323
324 agregation.f <- function(metric, Data, factors, casMetrique,
325 nbName="number")
326 {
327 ## Purpose: metric aggregation
328 ## ----------------------------------------------------------------------
329 ## Arguments: metric: colnames of chosen metric
330 ## Data: Unaggregated data table
331 ## factors: aggregation factors vector
332 ## casMetrique: named vector of observation types depending
333 ## on chosen metric
334 ## nbName : abundance column name
335 ## ----------------------------------------------------------------------
336 ## Author: Yves Reecht, Date: 20 déc. 2011, 14:29 modified by Coline ROYAUX 04 june 2020
337
338 switch(casMetrique[metric],
339 "sum"={
340 res <- tapply(Data[ , metric],
341 as.list(Data[ , factors, drop=FALSE]),
342 function(x)
343 {
344 ifelse(all(is.na(x)),
345 NA,
346 sum(x, na.rm=TRUE))
347 })
348 },
349 "w.mean"={
350 res <- tapply(1:nrow(Data),
351 as.list(Data[ , factors, drop=FALSE]),
352 function(ii)
353 {
354 ifelse(all(is.na(Data[ii, metric])),
355 NA,
356 weighted.mean(Data[ii, metric],
357 Data[ii, nbName],
358 na.rm=TRUE))
359 })
360 },
361 "w.mean.colonies"={
362 res <- tapply(1:nrow(Data),
363 as.list(Data[ , factors, drop=FALSE]),
364 function(ii)
365 {
366 ifelse(all(is.na(Data[ii, metric])),
367 NA,
368 weighted.mean(Data[ii, metric],
369 Data[ii, "colonies"],
370 na.rm=TRUE))
371 })
372 },
373 "w.mean.prop"={
374 res <- tapply(1:nrow(Data),
375 as.list(Data[ , factors, drop=FALSE]),
376 function(ii)
377 {
378 ifelse(all(is.na(Data[ii, metric])) || sum(Data[ii, "nombre.tot"], na.rm=TRUE) == 0,
379 NA,
380 ifelse(all(na.omit(Data[ii, metric]) == 0), # Pour ne pas avoir NaN.
381 0,
382 (sum(Data[ii, nbName][ !is.na(Data[ii, metric])], na.rm=TRUE) /
383 sum(Data[ii, "nombre.tot"], na.rm=TRUE)) *
384 ## Correction if size class isn't an aggregation factor
385 ## (otherwise value divided by number of present classes) :
386 ifelse(is.element("size.class", factors),
387 100,
388 100 * length(unique(Data$size.class)))))
389 })
390
391 },
392 "w.mean.prop.bio"={
393 res <- tapply(1:nrow(Data),
394 as.list(Data[ , factors, drop=FALSE]),
395 function(ii)
396 {
397 ifelse(all(is.na(Data[ii, metric])) || sum(Data[ii, "tot.biomass"], na.rm=TRUE) == 0,
398 NA,
399 ifelse(all(na.omit(Data[ii, metric]) == 0), # Pour ne pas avoir NaN.
400 0,
401 (sum(Data[ii, "biomass"][ !is.na(Data[ii, metric])], na.rm=TRUE) /
402 sum(Data[ii, "tot.biomass"], na.rm=TRUE)) *
403 ## Correction if size class isn't an aggregation factor
404 ## (otherwise value divided by number of present classes) :
405 ifelse(is.element("size.class", factors),
406 100,
407 100 * length(unique(Data$size.class)))))
408 })
409
410 },
411 "pres"={
412 res <- tapply(Data[ , metric],
413 as.list(Data[ , factors, drop=FALSE]),
414 function(x)
415 {
416 ifelse(all(is.na(x)), # When only NAs.
417 NA,
418 ifelse(any(x > 0, na.rm=TRUE), # Otherwise...
419 1, # ... presence if at least one observation in the group.
420 0))
421 })
422 },
423 "nbMax"={
424 ## Recuperation of raw abundances with selections :
425 nbTmp <- getReducedSVRdata.f(dataName=".NombresSVR", data=Data)
426
427 ## Sum by factor cross / rotation :
428 nbTmp2 <- apply(nbTmp,
429 which(is.element(names(dimnames(nbTmp)), c(factors, "rotation"))),
430 function(x)
431 {
432 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))
433 })
434
435 ## Sum by factor cross :
436 res <- as.array(apply(nbTmp2,
437 which(is.element(names(dimnames(nbTmp)), factors)),
438 function(x)
439 {
440 ifelse(all(is.na(x)), NA, max(x, na.rm=TRUE))
441 }))
442 },
443 "nbSD"={
444 ## Recuperation of raw abundances with selections :
445 nbTmp <- getReducedSVRdata.f(dataName=".NombresSVR", data=Data)
446
447 ## Sum by factor cross / rotation :
448 nbTmp2 <- apply(nbTmp,
449 which(is.element(names(dimnames(nbTmp)), c(factors, "rotation"))),
450 function(x)
451 {
452 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))
453 })
454
455 ## Sum by factor cross :
456 res <- as.array(apply(nbTmp2,
457 which(is.element(names(dimnames(nbTmp)), factors)),
458 function(x)
459 {
460 ifelse(all(is.na(x)), NA, sd(x, na.rm=TRUE))
461 }))
462 },
463 "densMax"={
464 ## Recuperation of raw abundances with selections :
465 densTmp <- getReducedSVRdata.f(dataName=".DensitesSVR", data=Data)
466
467 ## Sum by factor cross / rotation :
468 densTmp2 <- apply(densTmp,
469 which(is.element(names(dimnames(densTmp)), c(factors, "rotation"))),
470 function(x)
471 {
472 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))
473 })
474
475 ## Sum by factor cross :
476 res <- as.array(apply(densTmp2,
477 which(is.element(names(dimnames(densTmp)), factors)),
478 function(x)
479 {
480 ifelse(all(is.na(x)), NA, max(x, na.rm=TRUE))
481 }))
482 },
483 "densSD"={
484 ## Recuperation of raw abundances with selections :
485 densTmp <- getReducedSVRdata.f(dataName=".DensitesSVR", data=Data)
486
487 ## Sum by factor cross / rotation :
488 densTmp2 <- apply(densTmp,
489 which(is.element(names(dimnames(densTmp)), c(factors, "rotation"))),
490 function(x)
491 {
492 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE))
493 })
494
495 ## Sum by factor cross :
496 res <- as.array(apply(densTmp2,
497 which(is.element(names(dimnames(densTmp)), factors)),
498 function(x)
499 {
500 ifelse(all(is.na(x)), NA, sd(x, na.rm=TRUE))
501 }))
502 },
503 "%.nesting"={
504 res <- tapply(1:nrow(Data),
505 as.list(Data[ , factors, drop=FALSE]),
506 function(ii)
507 {
508 ifelse(all(is.na(Data[ii, metric])),
509 NA,
510 weighted.mean(Data[ii, metric],
511 Data[ii, "readable.tracks"],
512 na.rm=TRUE))
513 })
514 },
515 stop("Not implemented!")
516 )
517
518 ## dimension names
519 names(dimnames(res)) <- c(factors)
520
521 ## Transformation to long format :
522 reslong <- as.data.frame(as.table(res), responseName=metric)
523 reslong <- reslong[ , c(tail(colnames(reslong), 1), head(colnames(reslong), -1))] # metric first
524
525 return(reslong)
526 }
527
528 ######################################### end of the function agregation.f
529
530 ######################################### start of the function agregations.generic.f called y calcBiodiv.f in FucntExeCalcCommIndexesGalaxy.r
531
532 agregations.generic.f <- function(Data, metrics, factors, listFact=NULL, unitSpSz=NULL, unitSp=NULL,
533 nbName="number")
534 {
535 ## Purpose: Aggregate data
536 ## ----------------------------------------------------------------------
537 ## Arguments: Data : data set
538 ## metrics : aggregated metric
539 ## factors : aggregation factors
540 ## listFact : other factors to aggregate and add to output
541 ## unitSpSz : Metrics table by unitobs/species/Size Class
542 ## unitSp : Metrics table by unitobs/species
543 ## nbName : abundance colname
544 ##
545 ## Output : aggregated data frame
546 ## ----------------------------------------------------------------------
547 ## Author: Yves Reecht, Date: 18 oct. 2010, 15:47 modified by Coline ROYAUX 04 june 2020
548
549 ## trt depending on metric type :
550 casMetrique <- c("number"="sum",
551 "mean.length"="w.mean",
552 "taille_moy"="w.mean",
553 "biomass"="sum",
554 "Biomass"="sum",
555 "weight"="sum",
556 "mean.weight"="w.mean",
557 "density"="sum",
558 "Density"="sum",
559 "CPUE"="sum",
560 "CPUE.biomass"="sum",
561 "pres.abs"="pres",
562 "abundance.prop.SC"="w.mean.prop", # Not OK [!!!] ?
563 "biomass.prop.SC"="w.mean.prop.bio", # Not OK [!!!] ?
564 ## Benthos :
565 "colonies"="sum",
566 "coverage"="sum",
567 "mean.size.colonies"="w.mean.colonies",
568 ## SVR (expérimental) :
569 "number.max"="nbMax",
570 "number.sd"="nbSD",
571 "density.max"="densMax",
572 "density.sd"="densSD",
573 "biomass.max"="sum",
574 "spawning.success"="%.nesting",
575 "spawnings"="sum",
576 "readable.tracks"="sum",
577 "tracks.number"="sum")
578
579 ## add "readable.tracks" for egg laying percentage :
580 if (any(casMetrique[metrics] == "%.nesting"))
581 {
582 if (is.element("size.class", colnames(Data)))
583 {
584 if (is.null(unitSpSz)) stop("unitSpSz doit être défini")
585
586 Data <- merge(Data,
587 unitSpSz[ , c("species.code", "observation.unit", "size.class", "readable.tracks")],
588 by=c("species.code", "observation.unit", "size.class"),
589 suffixes=c("", ".y"))
590 }else{
591 if (is.null(unitSp)) stop("unitSp must be defined")
592
593 Data <- merge(Data,
594 unitSp[ , c("species.code", "observation.unit", "readable.tracks")],
595 by=c("species.code", "observation.unit"),
596 suffixes=c("", ".y"))
597 }
598 }else{}
599
600 ## Add "number" field for computing ponderate means if absent :
601 if (any(casMetrique[metrics] == "w.mean" | casMetrique[metrics] == "w.mean.prop"))
602 {
603 if (is.element("size.class", colnames(Data)))
604 {
605 if (is.null(unitSpSz)) stop("unitSpSz must be defined")
606
607 Data <- merge(Data,
608 unitSpSz[ , c("species.code", "observation.unit", "size.class", nbName)],
609 by=c("species.code", "observation.unit", "size.class"),
610 suffixes=c("", ".y"))
611
612 ## add tot abundance / species / observation unit :
613 nbTot <- tapply(unitSpSz[ , nbName],
614 as.list(unitSpSz[ , c("species.code", "observation.unit")]),
615 sum, na.rm=TRUE)
616
617 Data <- merge(Data,
618 as.data.frame(as.table(nbTot), responseName="nombre.tot"))
619 }else{
620 if (is.null(unitSp)) stop("unitSp must be defined")
621
622 Data <- merge(Data,
623 unitSp[ , c("species.code", "observation.unit", nbName)], # [!!!] unitSpSz ?
624 by=c("species.code", "observation.unit"),
625 suffixes=c("", ".y"))
626 }
627 }else{}
628
629 ## Add biomass field of biomass proportion by size class :
630 if (any(casMetrique[metrics] == "w.mean.prop.bio"))
631 {
632 if (is.null(unitSpSz)) stop("unitSpSz doit être défini")
633
634 Data <- merge(Data,
635 unitSpSz[ , c("species.code", "observation.unit", "size.class", "biomass")],
636 by=c("species.code", "observation.unit", "size.class"),
637 suffixes=c("", ".y"))
638
639 ## add tot biomass / species / observation unit :
640 biomTot <- tapply(unitSpSz$biomass,
641 as.list(unitSpSz[ , c("species.code", "observation.unit")]),
642 function(x)
643 {
644 ifelse(all(is.na(x)),
645 NA,
646 sum(x, na.rm=TRUE))
647 })
648
649 Data <- merge(Data,
650 as.data.frame(as.table(biomTot), responseName="tot.biomass"))
651 }
652
653 ## add colony field for ponderate means pondérées if absent :
654 if (any(casMetrique[metrics] == "w.mean.colonies" & ! is.element("colonies", colnames(Data))))
655 {
656 Data$colonies <- unitSp[match(apply(Data[ , c("species.code", "observation.unit")],
657 1, paste, collapse="*"),
658 apply(unitSp[ , c("species.code", "observation.unit")],
659 1, paste, collapse="*")), "colonies"]
660 }else{}
661
662
663 ## Aggregation of metric depending on factors :
664 reslong <- betterCbind(dfList=lapply(metrics, # sapply used to have names
665 agregation.f,
666 Data=Data, factors=factors, casMetrique=casMetrique,
667 nbName=nbName))
668
669 ## Aggregation and add other factors :
670 if ( ! (is.null(listFact) || length(listFact) == 0))
671 {
672 reslong <- cbind(reslong,
673 sapply(Data[ , listFact, drop=FALSE],
674 function(fact)
675 {
676 tapply(fact,
677 as.list(Data[ , factors, drop=FALSE]),
678 function(x)
679 {
680 if (length(x) > 1 && length(unique(x)) > 1) # must be one modality
681 {
682 return(NULL) # otherwise it is NULL
683 }else{
684 unique(as.character(x))
685 }
686 })
687 }))
688 }else{}
689
690 ## If some factors aren't at the right class :
691 if (any(tmp <- sapply(reslong[ , listFact, drop=FALSE], class) != sapply(Data[ , listFact, drop=FALSE], class)))
692 {
693 for (i in which(tmp))
694 {
695 switch(sapply(Data[ , listFact, drop=FALSE], class)[i],
696 "integer"={
697 reslong[ , listFact[i]] <- as.integer(as.character(reslong[ , listFact[i]]))
698 },
699 "numeric"={
700 reslong[ , listFact[i]] <- as.numeric(as.character(reslong[ , listFact[i]]))
701 },
702 reslong[ , listFact[i]] <- eval(call(paste("as", sapply(Data[ , listFact, drop=FALSE], class)[i], sep="."),
703 reslong[ , listFact[i]]))
704 )
705 }
706 }else{}
707
708 ## Initial order of factors levels :
709 reslong <- as.data.frame(sapply(colnames(reslong),
710 function(x)
711 {
712 if (is.factor(reslong[ , x]))
713 {
714 return(factor(reslong[ , x], levels=levels(Data[ , x])))
715 }else{
716 return(reslong[ , x])
717 }
718 }, simplify=FALSE))
719
720
721 ## Check of other aggregated factors supplémentaires. There must be no NULL elements :
722 if (any(sapply(reslong[ , listFact], function(x){any(is.null(unlist(x)))})))
723 {
724 warning(paste("One of the suppl. factors is probably a subset",
725 " of the observations grouping factor(s).", sep=""))
726 return(NULL)
727 }else{
728 return(reslong)
729 }
730 }
731
732 ######################################### end of the function agregations.generic.f
733
734 ######################################### start of the function dropLevels.f called y calcBiodiv.f in FucntExeCalcCommIndexesGalaxy.r and modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r
735 dropLevels.f <- function(df, which=NULL)
736 {
737 ## Purpose: Suppress unused levels of factors
738 ## ----------------------------------------------------------------------
739 ## Arguments: df : a data.frame
740 ## which : included columns index (all by default)
741 ## ----------------------------------------------------------------------
742 ## Author: Yves Reecht, Date: 10 août 2010, 13:29 modified by Coline ROYAUX 04 june 2020
743
744 if (class(df) != "data.frame")
745 {
746 stop("'df' must be a data.frame")
747 }else{
748 if (is.null(which))
749 {
750 x <- as.data.frame(sapply(df, function(x)
751 {
752 return(x[ ,drop=TRUE])
753 }, simplify=FALSE),
754 stringsAsFactors=FALSE)
755 }else{ # Only some columns used
756 x <- df
757
758 x[ , which] <- as.data.frame(sapply(df[ , which, drop=FALSE],
759 function(x)
760 {
761 return(x[ , drop=TRUE])
762 }, simplify=FALSE),
763 stringsAsFactors=FALSE)
764 }
765
766 return(x)
767 }
768 }
769 ######################################### end of the function dropLevels.f
770
771 ######################################### start of the function subsetToutesTables.f called by modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r
772
773 subsetToutesTables.f <- function(metrique, tabMetrics, facteurs, selections,
774 tabUnitobs, refesp, tableMetrique="", nbName="number", ObsType = "",
775 exclude=NULL, add=c("species.code", "observation.unit"))
776 {
777 ## Purpose: Extract useful data only from chosen metrics and factors
778 ## ----------------------------------------------------------------------
779 ## Arguments: metrique : chosen metric
780 ## facteurs : all chosen factors
781 ## selections : corresponding modality selected
782 ## tableMetrique : metrics table name
783 ## exclude : factors levels to exclude
784 ## add : field to add to data table
785 ## ----------------------------------------------------------------------
786 ## Author: Yves Reecht, Date: 6 août 2010, 16:46 modified by Coline ROYAUX 04 june 2020
787
788 ## If no metrics table available :
789 if (is.element(tableMetrique, c("", "TableOccurrences", "TablePresAbs")))
790 {
791 tableMetrique <- "unitSp"
792 }else{}
793
794 casTables <- c("unitSp"="unitSp",
795 "TablePresAbs"="unitSp",
796 "unitSpSz"="unitSpSz")
797
798 ## Recuperation of metrics table :
799 dataMetrique <- tabMetrics
800 unitobs <- tabUnitobs
801 refesp <- refesp
802
803 ## If no metrics available or already computed :
804 if (is.element(metrique, c("", "occurrence.frequency")))
805 {
806 metrique <- "tmp"
807 dataMetrique$tmp <- 0
808 dataMetrique$tmp[dataMetrique[ , nbName] > 0] <- 1
809 }else{}
810
811 if (!is.null(add))
812 {
813 metriques <- c(metrique, add[is.element(add, colnames(dataMetrique))])
814 }else{
815 metriques <- metrique
816 }
817
818 ## Subset depending on metrics table
819 switch(casTables[tableMetrique],
820 ## Observation table by unitobs and species :
821 unitSp={
822 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , metriques, drop=FALSE],
823 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])],
824 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs
825 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE],
826 refesp[match(dataMetrique$species.code[!is.na(dataMetrique[ , metrique])],
827 refesp$species.code), # ajout des colonnes sélectionnées d'especes
828 facteurs[is.element(facteurs, colnames(refesp))], drop=FALSE])
829 },
830 ## Observation table by unitobs, species and size class :
831 unitSpSz={
832 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) ,
833 c(metriques, "size.class"), drop=FALSE],
834 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])],
835 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs
836 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE],
837 refesp[match(dataMetrique$species.code[!is.na(dataMetrique[ , metrique])],
838 refesp$species.code), # ajout des colonnes sélectionnées d'especes
839 facteurs[is.element(facteurs, colnames(refesp))], drop=FALSE])
840 },
841 ## Other cases :
842 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , metriques, drop=FALSE],
843 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])],
844 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs.
845 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE])
846 )
847
848 selCol <- which(!is.na(selections))
849 if (!is.null(exclude))
850 {
851 selCol <- selCol[selCol != exclude]
852 }
853
854 ## Particular case of size classes :
855 if (is.element("size.class", colnames(restmp)))
856 {
857 if (length(grep("^[[:digit:]]*[-_][[:digit:]]*$", unique(as.character(restmp$size.class)), perl=TRUE)) ==
858 length(unique(as.character(restmp$size.class))))
859 {
860 restmp$size.class <-
861 factor(as.character(restmp$size.class),
862 levels=unique(as.character(restmp$size.class))[
863 order(as.numeric(sub("^([[:digit:]]*)[-_][[:digit:]]*$",
864 "\\1",
865 unique(as.character(restmp$size.class)),
866 perl=TRUE)),
867 na.last=FALSE)])
868 }else{
869 restmp$size.class <- factor(restmp$size.class)
870 }
871 }else{}
872
873 ## Biomass and density conversion -> /100m² :
874 if (any(is.element(colnames(restmp), c("biomass", "density",
875 "biomass.max", "density.max",
876 "biomass.sd", "density.sd"))) && ObsType != "fishing")
877 {
878 restmp[ , is.element(colnames(restmp),
879 c("biomass", "density",
880 "biomass.max", "density.max",
881 "biomass.sd", "density.sd"))] <- 100 *
882 restmp[, is.element(colnames(restmp),
883 c("biomass", "density",
884 "biomass.max", "density.max",
885 "biomass.sd", "density.sd"))]
886 }else{}
887
888 return(restmp)
889 }
890
891 ######################################### end of the function subsetToutesTables.f
892
893
894 ######################################### start of the function sortiesLM.f called by modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r
895 sortiesLM.f <- function(objLM, TabSum, #formule,
896 metrique, factAna, cut, colAna, listFact, lev = NULL, Data,
897 Log=FALSE, sufixe=NULL, type="espece")
898 {
899 ## Purpose: Form GLM and LM results
900 ## ----------------------------------------------------------------------
901 ## Arguments: objLM : lm object
902 ## TabSum : output summary table
903 ## formule : LM formula
904 ## metrique : Chosen metric
905 ## factAna : separation factor
906 ## cut : level of separation factor
907 ## colAna : colname for separation factor in output summary table
908 ## listFact : Analysis factors list
909 ## levels : Levels of analysis factors list
910 ## Data : Data used for analysis
911 ## Log : put log on metric ? (boolean)
912 ## sufixe : sufix for file name
913 ## type : analysis type
914 ## ----------------------------------------------------------------------
915 ## Author: Yves Reecht, Date: 25 août 2010, 16:19 modified by Coline ROYAUX 04 june 2020
916
917 sumLM <- summary(objLM)
918 if (length(grep("^glmmTMB", objLM$call)) > 0) #if random effects
919 {
920 TabSum[TabSum[,colAna]==cut,"AIC"] <- sumLM$AICtab[1]
921 TabSum[TabSum[,colAna]==cut,"BIC"] <- sumLM$AICtab[2]
922 TabSum[TabSum[,colAna]==cut,"logLik"] <- sumLM$AICtab[3]
923 TabSum[TabSum[,colAna]==cut,"deviance"] <- sumLM$AICtab[4]
924 TabSum[TabSum[,colAna]==cut,"df.resid"] <- sumLM$AICtab[5]
925
926 if (! is.null(lev)) ## if fixed effects + random effects
927 {
928 TabCoef <- as.data.frame(sumLM$coefficients$cond)
929 TabCoef$signif <- lapply(TabCoef[,"Pr(>|z|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}})
930
931 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Zvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"z value"]
932 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|z|)"]
933
934 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Zvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"z value"]}else{NA}}))
935 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|z|)"]}else{NA}}))
936 }else{}
937
938 switch(as.character(length(sumLM$varcor$cond)),
939 "1"={StdD <- c(sumLM$varcor$cond[[1]])},
940 "2"={StdD <- c(sumLM$varcor$cond[[1]],sumLM$varcor$cond[[2]])},
941 StdD <- NULL)
942 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"Std.Dev",collapse="|"),colnames(TabSum))] <- StdD
943 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"NbObservation",collapse="|"),colnames(TabSum))] <- sumLM$nobs
944 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"NbLevels",collapse="|"),colnames(TabSum))] <- unlist(lapply(listRand,FUN=function(x){nlevels(Data[,x])}))
945
946 }else{ ## if fixed effects only
947
948 TabSum[TabSum[,colAna]==cut,"AIC"] <- sumLM$aic
949 TabSum[TabSum[,colAna]==cut,"Resid.deviance"] <- sumLM$deviance
950 TabSum[TabSum[,colAna]==cut,"df.resid"] <- sumLM$df.residual
951 TabSum[TabSum[,colAna]==cut,"Null.deviance"] <- sumLM$null.deviance
952 TabSum[TabSum[,colAna]==cut,"df.null"] <- sumLM$df.null
953 TabCoef <- as.data.frame(sumLM$coefficients)
954
955 if (sumLM$family[1] == "gaussian" || sumLM$family[1] == "quasipoisson")
956 {
957
958 TabCoef$signif <- lapply(TabCoef[,"Pr(>|t|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}})
959 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Tvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"t value"]
960 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|t|)"]
961
962 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Tvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"t value"]}else{NA}}))
963
964 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|t|)"]}else{NA}}))
965 }else{
966 TabCoef$signif <- lapply(TabCoef[,"Pr(>|z|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}})
967
968 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Zvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"z value"]
969 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|z|)"]
970
971 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Zvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"z value"]}else{NA}}))
972 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|z|)"]}else{NA}}))
973 }
974 }
975
976 if (! is.null(lev)) ## if fixed effects
977 {
978 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Estimate",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Estimate"]
979 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Std.Err",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Std. Error"]
980 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*signif",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"signif"]
981
982 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Estimate",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Estimate"]}else{NA}}))
983
984 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Std.Err",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Std. Error"]}else{NA}}))
985 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"signif",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"signif"]}else{NA}}))
986 }else{}
987
988 return(TabSum)
989
990 }
991
992
993 ######################################### end of the function sortiesLM.f
994
995 ######################################### start of the function graphTitle.f called by sortiesLM.f
996
997 graphTitle.f <- function(metrique, modGraphSel, factGraph, listFact, model=NULL, type="espece",
998 lang = getOption("P.lang"))
999 {
1000 ## Purpose: Automatically write a name for a graph
1001 ## ----------------------------------------------------------------------
1002 ## Arguments:
1003 ## ----------------------------------------------------------------------
1004 ## Author: Yves Reecht, Date: 14 oct. 2010, 15:44 modified by Coline ROYAUX 04 june 2020
1005 return(paste(ifelse(is.null(model),
1006 "Values of ",
1007 paste(model,
1008 " for",
1009 sep="")),
1010 metrique,
1011 ifelse(is.element(type, c("espece", "unitobs", "CL_espece", "unitobs(CL)")),
1012 paste("aggregated"),
1013 ""),
1014 switch(type,
1015 "espece"=" per species and station",
1016 "CL_espece"=" per size class, species and station",
1017 "unitobs"=" per station",
1018 "unitobs(CL)"=" per station",
1019 "CL_unitobs"=" per size class and station",
1020 "biodiv"=" per station",
1021 ""),
1022 switch(type,
1023 "espece"={
1024 ifelse(modGraphSel == "", # Only separation factor if defined
1025 "",
1026 paste("\nfor the field",
1027 " '", factGraph, "' = ", modGraphSel, sep=""))
1028 },
1029 "CL_espece"={
1030 ifelse(modGraphSel == "", # Only separation factor if defined
1031 "",
1032 paste("\nfor the field",
1033 " '", factGraph, "' = ", modGraphSel, sep=""))
1034 },
1035 "unitobs"={
1036 ifelse(modGraphSel[1] == "", # Only separation factor if defined
1037 "\nfor all species",
1038 paste("\nfor all species matching",
1039 " '", factGraph, "' = (",
1040 paste(modGraphSel, collapse=", "), ")", sep=""))
1041 },
1042 "unitobs(CL)"={
1043 ifelse(modGraphSel[1] == "", # Only separation factor if defined
1044 "\nfor all size classes",
1045 paste("\nfor size classes matching",
1046 " '", factGraph, "' = (",
1047 paste(modGraphSel, collapse=", "), ")", sep=""))
1048 },
1049 "CL_unitobs"={
1050 ifelse(modGraphSel[1] == "", # Only separation factor if defined
1051 "\nfor all species",
1052 paste("\nfor all species matching",
1053 " '", factGraph, "' = (",
1054 paste(modGraphSel, collapse=", "), ")", sep=""))
1055 },
1056 "biodiv"={
1057 ifelse(modGraphSel[1] == "", # Only separation factor if defined
1058 "",
1059 paste("\nfor stations matching",
1060 " '", factGraph, "' = (",
1061 paste(modGraphSel, collapse=", "), ")", sep=""))
1062 },
1063 ""),
1064 "\n by ",
1065 paste(sapply(listFact[length(listFact):1],
1066 function(x)paste(c(## varNames.f(x, "article"),
1067 "",
1068 x, collapse="")),
1069 collapse=" and"),
1070 "\n", sep="")))
1071 }
1072
1073 ######################################### end of the function graphTitle.f
1074
1075 ######################################### start of the function noteGLM.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f
1076
1077 noteGLM.f <- function(data, objLM, metric, listFact, details = FALSE)
1078 {
1079 ## Purpose: Note your GLM analysis
1080 ## ----------------------------------------------------------------------
1081 ## Arguments: data : Dataframe used for analysis
1082 ## objLM : GLM assessed
1083 ## metric : selected metric
1084 ## listFact : Analysis factors list
1085 ## ----------------------------------------------------------------------
1086 ## Author: Coline ROYAUX, 26 june 2020
1087
1088 rate <- 0
1089 detres <- list(complete_plan=NA, balanced_plan=NA, NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA)
1090
1091 #### Data criterions ####
1092
1093 ## Plan
1094
1095 plan <- as.data.frame(table(data[,listFact]))
1096
1097 if (nrow(plan[plan$Freq==0,]) < nrow(plan)*0.1) # +0.5 if less than 10% of possible factor's level combinations aren't represented in the sampling scheme
1098 {
1099 rate <- rate + 0.5
1100 detres$complete_plan <- TRUE
1101
1102 if (summary(as.factor(plan$Freq))[1] > nrow(plan)*0.9) # +0.5 if the frequency of the most represented frequency of possible factor's levels combinations is superior to 90% of the total number of possible factor's levels combinations
1103 {
1104 rate <- rate + 0.5
1105 detres$balanced_plan <- TRUE
1106 }else{}
1107
1108 }else{
1109 detres$complete_plan <- FALSE
1110 detres$balanced_plan <- FALSE
1111 }
1112
1113 if (nrow(data) - nrow(na.omit(data)) < nrow(data)*0.1) # +1 if less than 10% of the lines in the dataframe bares a NA
1114 {
1115 rate <- rate + 1
1116 detres$NA_proportion_OK <- TRUE
1117 }else{
1118 detres$NA_proportion_OK <- FALSE
1119 }
1120
1121 #### Model criterions ####
1122
1123 if (length(grep("quasi",objLM$family)) == 0) #DHARMa doesn't work with quasi distributions
1124 {
1125
1126 Residuals <- simulateResiduals(objLM)
1127
1128 capture.output(testRes <- testResiduals(Residuals))
1129 testZero <- testZeroInflation(Residuals)
1130
1131 ## dispersion of residuals
1132
1133 if (testRes$dispersion$p.value > 0.05) # +1.5 if dispersion tests not significative
1134 {
1135 rate <- rate + 1.5
1136 detres$no_residual_dispersion <- TRUE
1137 }else{
1138 detres$no_residual_dispersion <- FALSE
1139 }
1140
1141 ## uniformity of residuals
1142
1143 if (testRes$uniformity$p.value > 0.05) # +1 if uniformity tests not significative
1144 {
1145 rate <- rate + 1.5
1146 detres$uniform_residuals <- TRUE
1147 }else{
1148 detres$uniform_residuals <- FALSE
1149 }
1150
1151 ## residuals outliers
1152
1153 if (testRes$outliers$p.value > 0.05) # +0.5 if outliers tests not significative
1154 {
1155 rate <- rate + 0.5
1156 detres$outliers_proportion_OK <- TRUE
1157 }else{
1158 detres$outliers_proportion_OK <- FALSE
1159 }
1160
1161 ## Zero inflation test
1162
1163 if (testZero$p.value > 0.05) # +1 if zero inflation tests not significative
1164 {
1165 rate <- rate + 1.5
1166 detres$no_zero_inflation <- TRUE
1167 }else{
1168 detres$no_zero_inflation <- FALSE
1169 }
1170
1171 ## Factors/observations ratio
1172
1173 if (length(listFact)/nrow(na.omit(data)) < 0.1) # +1 if quantity of factors is less than 10% of the quantity of observations
1174 {
1175 rate <- rate + 1
1176 detres$observation_factor_ratio_OK <- TRUE
1177 }else{
1178 detres$observation_factor_ratio_OK <- FALSE
1179 }
1180
1181 ## less than 10 factors' level on random effect
1182
1183 if (length(grep("^glmmTMB", objLM$call)) > 0)
1184 {
1185 nlevRand <- c()
1186 for(fact in names(summary(objLM)$varcor$cond))
1187 {
1188 nlevRand <- c(nlevRand,length(unlist(unique(data[,fact]))))
1189 }
1190
1191 if (all(nlevRand > 10)) # +1 if more than 10 levels in one random effect
1192 {
1193 rate <- rate + 1
1194 detres$enough_levels_random_effect <- TRUE
1195 }else{
1196 detres$enough_levels_random_effect <- FALSE
1197 }
1198 }else{}
1199
1200 detres$rate <- rate
1201
1202 if (details)
1203 {
1204 return(detres)
1205 }else{
1206 return(rate)
1207 }
1208
1209 }else{
1210 return(NA)
1211 cat("Models with quasi distributions can't be rated for now")
1212 }
1213 }
1214
1215 ######################################### end of the function noteGLM.f
1216
1217 ######################################### start of the function noteGLMs.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f
1218
1219 noteGLMs.f <- function(tabRate, exprML, objLM, file_out=FALSE)
1220 {
1221 ## Purpose: Note your GLM analysis
1222 ## ----------------------------------------------------------------------
1223 ## Arguments: data : rates table from noteGLM.f
1224 ## objLM : GLM assessed
1225 ## metric : selected metric
1226 ## listFact : Analysis factors list
1227 ## ----------------------------------------------------------------------
1228 ## Author: Coline ROYAUX, 26 june 2020
1229
1230 RateM <- mean(na.omit(tabRate[,"rate"]))
1231 sum <- summary(objLM)
1232
1233 if (length(grep("^glmmTMB", objLM$call)) > 0)
1234 {
1235 if (median(na.omit(tabRate[,"rate"])) >= 6) # if 50% has a rate superior or equal to 6 +1
1236 {
1237 RateM <- RateM + 1
1238 }
1239
1240 if (quantile(na.omit(tabRate[,"rate"]), probs=0.9) >= 6) # if 90% has a rate superior or equal to 6 +1
1241 {
1242 RateM <- RateM + 1
1243 }
1244 }else{
1245 if (median(na.omit(tabRate[,"rate"])) >= 5) # if 50% has a rate superior or equal to 5 +1
1246 {
1247 RateM <- RateM + 1
1248 }
1249
1250 if (quantile(na.omit(tabRate[,"rate"]), probs=0.9) >= 5) # if 90% has a rate superior or equal to 5 +1
1251 {
1252 RateM <- RateM + 1
1253 }
1254 }
1255
1256 if (file_out)
1257 {
1258 namefile <- "RatingGLM.txt"
1259
1260 cat("###########################################################################",
1261 "\n########################### Analysis evaluation ###########################",
1262 "\n###########################################################################", file=namefile, fill=1,append=TRUE)
1263
1264 ## Informations on model :
1265 cat("\n\n######################################### \nFitted model:", file=namefile, fill=1,append=TRUE)
1266 cat("\t", deparse(exprML), "\n\n", file=namefile, sep="",append=TRUE)
1267 cat("Family: ", sum$family[[1]],
1268 file=namefile,append=TRUE)
1269 cat("\n\nNumber of analysis: ", nrow(tabRate), file=namefile, append=TRUE)
1270
1271 ## Global rate :
1272 cat("\n\n######################################### \nGlobal rate for all analysis:",
1273 "\n\n", RateM, "out of 10", file=namefile, append=TRUE)
1274
1275 ## details on every GLM :
1276 #NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA
1277 cat("\n\n######################################### \nDetails on every analysis:\n\n", file=namefile, append=TRUE)
1278 cat("Analysis\tC1\tC2\tC3\tC4\tC5\tC6\tC7\tC8\tC9\tFinal rate", file=namefile, append=TRUE)
1279 apply(tabRate, 1, FUN=function(x)
1280 {
1281
1282 if (!is.na(x["complete_plan"]) && x["complete_plan"]==TRUE)
1283 {
1284 cat("\n",x[1],"\tyes", file=namefile, append=TRUE)
1285 }else{
1286 cat("\n",x[1],"\tno", file=namefile, append=TRUE)
1287 }
1288
1289 for (i in c("balanced_plan","NA_proportion_OK", "no_residual_dispersion", "uniform_residuals", "outliers_proportion_OK", "no_zero_inflation", "observation_factor_ratio_OK", "enough_levels_random_effect"))
1290 {
1291 if (!is.na(x[i]) && x[i]==TRUE)
1292 {
1293 cat("\tyes", file=namefile, append=TRUE)
1294 }else{
1295 cat("\tno", file=namefile, append=TRUE)
1296 }
1297 }
1298
1299 cat("\t",x["rate"], "/ 8", file=namefile, append=TRUE)
1300
1301
1302 })
1303 cat("\n\nC1: Complete plan?\nC2: Balanced plan?\nC3: Few NA?\nC4: Regular dispersion?\nC5: Uniform residuals?\nC6: Regular outliers proportion?\nC7: No zero-inflation?\nC8: Enough observations for the amount of factors?\nC9: Enough levels on random effect?", file=namefile, append=TRUE)
1304
1305 ## Red flags - advice :
1306 cat("\n\n######################################### \nRed flags - advice:\n\n", file=namefile, append=TRUE)
1307 if (all(na.omit(tabRate["NA_proportion_OK"]) == FALSE))
1308 {
1309 cat("\n","\t- More than 10% of your dataset bares NAs", file=namefile, append=TRUE)
1310 }else{}
1311
1312 if (length(grep("FALSE",tabRate["no_residual_dispersion"])) / length(na.omit(tabRate["no_residual_dispersion"])) > 0.5)
1313 {
1314 cat("\n","\t- More than 50% of your analyses are over- or under- dispersed : Try with another distribution family", file=namefile, append=TRUE)
1315 }else{}
1316
1317 if (length(grep("FALSE",tabRate["uniform_residuals"])) / length(na.omit(tabRate["uniform_residuals"])) > 0.5)
1318 {
1319 cat("\n","\t- More than 50% of your analyses haven't an uniform distribution of residuals : Try with another distribution family", file=namefile, append=TRUE)
1320 }else{}
1321
1322 if (length(grep("FALSE",tabRate["outliers_proportion_OK"])) / length(na.omit(tabRate["outliers_proportion_OK"])) > 0.5)
1323 {
1324 cat("\n","\t- More than 50% of your analyses have too much outliers : Try with another distribution family or try to select your data", file=namefile, append=TRUE)
1325 }else{}
1326
1327 if (length(grep("FALSE",tabRate["no_zero_inflation"])) / length(na.omit(tabRate["no_zero_inflation"])) > 0.5)
1328 {
1329 cat("\n","\t- More than 50% of your analyses have zero inflation : Try to select your data", file=namefile, append=TRUE)
1330 }else{}
1331
1332 if (length(grep("FALSE",tabRate["observation_factor_ratio_OK"])) / length(na.omit(tabRate["observation_factor_ratio_OK"])) > 0.5)
1333 {
1334 cat("\n","\t- More than 50% of your analyses have not enough observations for the amount of factors : Try to use less factors in your analysis or try to use another separation factor", file=namefile, append=TRUE)
1335 }else{}
1336
1337 if (any(tabRate["enough_levels_random_effect"] == FALSE, na.rm=TRUE) && length(grep("^glmmTMB", objLM$call)) > 0)
1338 {
1339 cat("\n","\t- Random effect hasn't enough levels to be robust : If it has less than ten levels remove the random effect", file=namefile, append=TRUE)
1340 }else{}
1341 }else{
1342
1343 return(RateM)
1344
1345 }
1346 }
1347
1348 ######################################### end of the function noteGLM.f
1349
1350 ######################################### start of the function infoStats.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f
1351
1352 infoStats.f <- function(filename, Data, agregLevel=c("species", "unitobs"), type=c("graph", "stat"),
1353 metrique, factGraph, factGraphSel, listFact, listFactSel)
1354 {
1355 ## Purpose: Écrire les infos et statistic sur les données associées à
1356 ## un graphique ou analyse.
1357 ## ----------------------------------------------------------------------
1358 ## Arguments: filename : chemin du fichier de résultats.
1359 ## Data : données du graphique/de l'analyse.
1360 ## agregLevel : niveau d'agrégation de la fonction appelante.
1361 ## type : type de fonction appelante (grapique ou analyse).
1362 ## metrique : la métrique choisie.
1363 ## factGraph : le facteur sélection des espèces.
1364 ## factGraphSel : la sélection de modalités pour ce dernier
1365 ## listFact : liste du (des) facteur(s) de regroupement
1366 ## listFactSel : liste des modalités sélectionnées pour ce(s)
1367 ## dernier(s)
1368 ## ----------------------------------------------------------------------
1369 ## Author: Yves Reecht, Date: 10 sept. 2012, 15:26 modified by Coline ROYAUX 04 june 2020
1370
1371 ## Open file :
1372 File <- file(description=filename,
1373 open="w", encoding="latin1")
1374
1375 ## if error :
1376 on.exit(if (exists("filename") &&
1377 tryCatch(isOpen(File),
1378 error=function(e)return(FALSE))) close(File))
1379
1380 ## Metrics and factors infos :
1381 printSelectionInfo.f(metrique=metrique, #factGraph=factGraph, factGraphSel=factGraphSel,
1382 listFact=listFact, #listFactSel=listFactSel,
1383 File=File,
1384 agregLevel=agregLevel, type=type)
1385
1386 ## statistics :
1387 if (class(Data) == "list")
1388 {
1389 cat("\n###################################################",
1390 "\nStatistics per level of splitting factor:\n",
1391 sep="", file=File,append=TRUE)
1392
1393 invisible(sapply(1:length(Data),
1394 function(i)
1395 {
1396 printStats.f(Data=Data[[i]], metrique=metrique, listFact=listFact, File=File,
1397 headline=factGraphSel[i])
1398 }))
1399 }else{
1400 printStats.f(Data=Data, metrique=metrique, listFact=listFact, File=File,
1401 headline=NULL)
1402 }
1403
1404 ## Close file :
1405 close(File)
1406
1407 }
1408
1409 ######################################### end of the function infoStats.f
1410
1411
1412 ######################################### start of the function printSelectionInfo.f called by infoStats.f
1413
1414 printSelectionInfo.f <- function(metrique, listFact,
1415 File,
1416 agregLevel=c("species", "unitobs"), type=c("graph", "stat"))
1417 {
1418 ## Purpose: Write data informations
1419 ## ----------------------------------------------------------------------
1420 ## Arguments: metrique : chosen metric
1421 ## listFact : factor's list
1422 ## File : Results file name
1423 ## agregLevel : aggregation level
1424 ## type : function type
1425 ## ----------------------------------------------------------------------
1426 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:41 modified by Coline ROYAUX 04 june 2020
1427
1428 cat("\n##################################################\n",
1429 "Metrics and factors (and possible units/selections):\n",
1430 sep="", file=File,append=TRUE)
1431
1432 ## metric info :
1433 cat("\n Metrics:", metrique,
1434 "\n", file=File,append=TRUE)
1435
1436 ## aggregation level :
1437 cat(" aggregated per ",
1438 switch(agregLevel,
1439 "CL_espece"=,"CL_unitobs"=,"spCL_unitobs"=,"spCL_espece"={
1440 "size class / "
1441 }),
1442 switch(agregLevel,
1443 "CL_espece"=,"spCL_espece"=,"species"=,"spSpecies"=,"spEspece"={
1444 "species / "
1445 }),
1446 switch(agregLevel,
1447 "spUnitobs"=,"spCL_unitobs"=,"spCL_espece"=,"spUnitobs(CL)"=,"spSpecies"=,"spEspece"={
1448 paste(listFact, " (mean over ", sep="")
1449 }),
1450 "observation units",
1451 switch(agregLevel,
1452 "spUnitobs"=,"spCL_unitobs"=,"spCL_espece"=,"spUnitobs(CL)"=,"spSpecies"=,"spEspece"={
1453 ")"
1454 }),
1455 ".\n",
1456 sep="", file=File,append=TRUE)
1457
1458 ## Separation factors :
1459 # switch(agregLevel,
1460 # "species"=,"CL_espece"=,"espece"={ # Adapté également pour les LMs.
1461 # cat("\n",
1462 # switch(type,
1463 # "graph"="Graphics separation factor",
1464 # "stat"="Analyses separation factor"),
1465 # " : ",
1466 # ifelse(factGraph == "", "printSelectionInfo.f.11",
1467 # ifelse(is.na(factGraphSel[1]),
1468 # paste(varNames.f(factGraph, "nom"), "none!"),
1469 # paste(varNames.f(factGraph, "nom"), " (",
1470 # paste(factGraphSel, collapse=", "), ")", sep=""))), "\n",
1471 # sep="", file=File,append=TRUE)
1472 # },
1473 # "unitobs"=,"CL_unitobs"=,"unitobs(CL)"=,"spUnitobs"={
1474 # cat("(warning: no selection!!!)",
1475 # ifelse(factGraph == "", "\nSelection factor for aggregation of observations: ",
1476 # ifelse(is.na(factGraphSel[1]),
1477 # paste(varNames.f(factGraph, "nom"), "none (all species/size classes)!"),
1478 # paste(varNames.f(factGraph, "nom"), " (",
1479 # paste(factGraphSel, collapse=", "), ")", sep=""))), "\n",
1480 # sep="", file=File,append=TRUE)
1481 # })
1482
1483 ## Clustering factors :
1484 if (is.element(agregLevel, c("spCL_unitobs", "spCL_espece", "spSpecies", "spEspece",
1485 "spUnitobs", "spUnitobs(CL)"))) {type <- "spatialGraph"}
1486
1487 cat(switch(type,
1488 "graph"="\nGrouping factor(s): \n * ",
1489 "stat"="\nAnalyses factor(s): \n * ",
1490 "spatialGraph"="\nSpatial aggregation factor(s): \n * "),
1491 paste(listFact,collaspe="\n * "),"\n",file=File,append=TRUE)
1492
1493 # invisible(sapply(1:length(listFact),
1494 # function(i)
1495 # {
1496 # cat("\n * ",
1497 # ifelse(is.na(listFactSel[[i]][1]),
1498 # paste(varNames.f(listFact[i], "nom"), "(no selection)"),
1499 # paste(varNames.f(listFact[i], "nom"), " (",
1500 # paste(listFactSel[[i]], collapse=", "), ")", sep="")), "\n",
1501 # sep="", file=File,append=TRUE)
1502 # }))
1503 }
1504
1505 ######################################### end of the function printSelectionInfo.f
1506
1507
1508 ######################################### start of the function printStats.f called by infoStats.f
1509
1510 printStats.f <- function(Data, metrique, listFact, File, headline=NULL)
1511 {
1512 ## Purpose: Write general statistics table
1513 ## ----------------------------------------------------------------------
1514 ## Arguments: Data : Analysis data
1515 ## metrique : metric's name
1516 ## listFact : Factor's list
1517 ## File : Simple statistics file name
1518 ## ----------------------------------------------------------------------
1519 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:09 modified by Coline ROYAUX 04 june 2020
1520
1521 ## Header :
1522 if ( ! is.null(headline))
1523 {
1524 cat("\n", rep("#", nchar(headline) + 3), "\n",
1525 "## ", headline, "\n",
1526 sep="", file=File,append=TRUE)
1527 }else{}
1528
1529 cat("\n########################\nBase statistics:\n\n", file=File,append=TRUE)
1530
1531 capture.output(print(summary.fr(Data[ , metrique])), file=File, append=TRUE)
1532
1533 if ( ! is.null(listFact))
1534 {
1535 cat("\n#########################################",
1536 "\nStatistics per combination of factor levels:\n\n", file=File, sep="",append=TRUE)
1537
1538 ## Compute summary for each existing factor's cross :
1539 res <- with(Data,
1540 tapply(eval(parse(text=metrique)),
1541 INDEX=do.call(paste,
1542 c(lapply(listFact,
1543 function(y)eval(parse(text=y))),
1544 sep=".")),
1545 FUN=summary.fr))
1546
1547 ## results in table
1548 capture.output(print(do.call(rbind, res)),
1549 file=File, append=TRUE)
1550 }else{}
1551
1552 ## empty line :
1553 cat("\n", file=File,append=TRUE)
1554 }
1555
1556 ######################################### end of the function printStats.f
1557
1558
1559 ######################################### start of the function summary.fr called by printStats.f
1560 summary.fr <- function(object, digits = max(3, getOption("digits") - 3),...)
1561 {
1562 ## Purpose: Adding SD and N to summary
1563 ## ----------------------------------------------------------------------
1564 ## Arguments: object : Object to summarise
1565 ## ----------------------------------------------------------------------
1566 ## Author: Yves Reecht, Date: 13 sept. 2012, 15:47 modified by Coline ROYAUX 04 june 2020
1567
1568 if ( ! is.numeric(object)) stop("Programming error")
1569
1570 ## Compute summary :
1571 res <- c(summary(object=object, digits, ...), "sd"=signif(sd(x=object), digits=digits), "N"=length(object))
1572
1573 return(res)
1574 }
1575
1576 ######################################### start of the function summary.fr
1577
1578 ######################################### Package DHARMa
1579
1580 ################ simulateResiduals.R
1581
1582 #' Create simulated residuals
1583 #'
1584 #' The function creates scaled residuals by simulating from the fitted model. Residuals can be extracted with \code{\link{residuals.DHARMa}}. See \code{\link{testResiduals}} for an overview of residual tests, \code{\link{plot.DHARMa}} for an overview of available plots.
1585 #'
1586 #' @param fittedModel a fitted model of a class supported by DHARMa
1587 #' @param n number of simulations. Default is 100. A more save value would be 250 or even 1000. The smaller the number, the higher the stochastic error on the residuals. Also, for very small n, discretization artefacts can influence the tests.
1588 #' @param refit if FALSE, new data will be simulated and scaled residuals will be created by comparing observed data with new data. If TRUE, the model will be refit on the simulated data (parametric bootstrap), and scaled residuals will be created by comparing observed with refitted residuals.
1589 #' @param integerResponse if TRUE, noise will be added at to the residuals to maintain a uniform expectations for integer responses (such as Poisson or Binomial). Usually, the model will automatically detect the appropriate setting, so there is no need to adjust this setting.
1590 #' @param plot if TRUE, \code{\link{plotResiduals}} will be directly run after the residuals have been calculated
1591 #' @param ... parameters to pass to the simulate function of the model object. An important use of this is to specify whether simulations should be conditional on the current random effect estimates, e.g. via re.form. Note that not all models support syntax to specify conditionao or unconditional simulations. See also details
1592 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details.
1593 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}}
1594 #' @return An S3 class of type "DHARMa", essentially a list with various elements. Implemented S3 functions include plot, print and \code{\link{residuals.DHARMa}}. Residuals returns the calculated scaled residuals.
1595 #'
1596 #' @details There are a number of important considerations when simulating from a more complex (hierarchical) model:
1597 #'
1598 #' \strong{Re-simulating random effects / hierarchical structure}: in a hierarchical model, we have several stochastic processes aligned on top of each other. Specifically, in a GLMM, we have a lower level stochastic process (random effect), whose result enters into a higher level (e.g. Poisson distribution). For other hierarchical models such as state-space models, similar considerations apply.
1599 #'
1600 #' In such a situation, we have to decide if we want to re-simulate all stochastic levels, or only a subset of those. For example, in a GLMM, it is common to only simulate the last stochastic level (e.g. Poisson) conditional on the fitted random effects. This is often referred to as a conditional simuation. For controlling how many levels should be re-simulated, the simulateResidual function allows to pass on parameters to the simulate function of the fitted model object. Please refer to the help of the different simulate functions (e.g. ?simulate.merMod) for details. For merMod (lme4) model objects, the relevant parameters are parameters are use.u and re.form
1601 #'
1602 #' If the model is correctly specified, the simulated residuals should be flat regardless how many hierarchical levels we re-simulate. The most thorough procedure would therefore be to test all possible options. If testing only one option, I would recommend to re-simulate all levels, because this essentially tests the model structure as a whole. This is the default setting in the DHARMa package. A potential drawback is that re-simulating the lower-level random effects creates more variability, which may reduce power for detecting problems in the upper-level stochastic processes. In particular dispersion tests may produce different results when switching from conditional to unconditional simulations, and often the conditional simulation is more sensitive.
1603 #'
1604 #' \strong{Integer responses}: a second complication is the treatment of inter responses. Imaging we have observed a 0, and we predict 30\% zeros - what is the quantile that we should display for the residual? To deal with this problem and maintain a uniform response, the option integerResponse adds a uniform noise from -0.5 to 0.5 on the simulated and observed response, which creates a uniform distribution - you can see this via hist(ecdf(runif(10000))(runif(10000))).
1605 #'
1606 #' DHARMa will try to automatically if the fitted model has an integer or discrete distribution via the family argument. However, in some cases the family does not allow to uniquely identify the distribution type. For example, a tweedie distribution can be inter or continuous. Therefore, DHARMa will additionally check the simulation results for repeated values, and will change the distribution type if repeated values are found (a message is displayed in this case).
1607 #'
1608 #' \strong{Refitting or not}: a third issue is how residuals are calculated. simulateResiduals has two options that are controlled by the refit parameter:
1609 #'
1610 #' 1. if refit = FALSE (default), new data is simulated from the fitted model, and residuals are calculated by comparing the observed data to the new data
1611 #'
1612 #' 2. if refit = TRUE, a parametric bootstrap is performed, meaning that the model is refit on the new data, and residuals are created by comparing observed residuals against refitted residuals. I advise against using this method per default (see more comments in the vignette), unless you are really sure that you need it.
1613 #'
1614 #' \strong{Residuals per group}: In many situations, it can be useful to look at residuals per group, e.g. to see how much the model over / underpredicts per plot, year or subject. To do this, use \code{\link{recalculateResiduals}}, together with a grouping variable (see also help)
1615 #'
1616 #' \strong{Transformation to other distributions}: DHARMa calculates residuals for which the theoretical expectation (assuming a correctly specified model) is uniform. To transfor this residuals to another distribution (e.g. so that a correctly specified model will have normal residuals) see \code{\link{residuals.DHARMa}}.
1617 #'
1618 #' @seealso \code{\link{testResiduals}}, \code{\link{plot.DHARMa}}, \code{\link{plotResiduals}}, \code{\link{print.DHARMa}}, \code{\link{residuals.DHARMa}}, \code{\link{recalculateResiduals}}
1619 #'
1620 #'
1621 #' @example inst/examples/simulateResidualsHelp.R
1622 #' @import stats
1623 #' @export
1624 simulateResiduals <- function(fittedModel, n = 250, refit = F, integerResponse = NULL, plot = F, seed = 123, method = c("PIT", "traditional"), ...){
1625
1626 ######## general assertions and startup calculations ##########
1627
1628 if (n < 2) stop("error in DHARMa::simulateResiduals: n > 1 is required to calculate scaled residuals")
1629 checkModel(fittedModel)
1630 match.arg(method)
1631 randomState <-getRandomState(seed)
1632 on.exit({randomState$restoreCurrent()})
1633 ptm <- proc.time()
1634
1635 ####### extract model info ############
1636
1637 out = list()
1638
1639 family = family(fittedModel)
1640 out$fittedModel = fittedModel
1641 out$modelClass = class(fittedModel)[1]
1642
1643 out$nObs = nobs(fittedModel)
1644 out$nSim = n
1645 out$refit = refit
1646 out$observedResponse = getObservedResponse(fittedModel)
1647
1648 if(is.null(integerResponse)){
1649 if (family$family %in% c("binomial", "poisson", "quasibinomial", "quasipoisson", "Negative Binom", "nbinom2", "nbinom1", "genpois", "compois", "truncated_poisson", "truncated_nbinom2", "truncated_nbinom1", "betabinomial", "Poisson", "Tpoisson", "COMPoisson", "negbin", "Tnegbin") | grepl("Negative Binomial",family$family) ) integerResponse = TRUE
1650 else integerResponse = FALSE
1651 }
1652 out$integerResponse = integerResponse
1653
1654 out$problems = list()
1655
1656 # re-form should be set to ~0 to avoid spurious residual patterns, see https://github.com/florianhartig/DHARMa/issues/43
1657
1658 if(out$modelClass %in% c("HLfit")){
1659 out$fittedPredictedResponse = predict(fittedModel, type = "response", re.form = ~0)[,1L]
1660 }else{
1661 out$fittedPredictedResponse = predict(fittedModel, type = "response", re.form = ~0)
1662 }
1663
1664 out$fittedFixedEffects = getFixedEffects(fittedModel)
1665 out$fittedResiduals = residuals(fittedModel, type = "response")
1666
1667 ######## refit = F ##################
1668
1669 if (refit == FALSE){
1670
1671 out$simulatedResponse = getSimulations(fittedModel, nsim = n, type = "normal", ...)
1672
1673 checkSimulations(out$simulatedResponse, out$nObs, out$nSim)
1674
1675 out$scaledResiduals = getQuantile(simulations = out$simulatedResponse , observed = out$observedResponse , integerResponse = integerResponse, method = method)
1676
1677 ######## refit = T ##################
1678 } else {
1679
1680 # Adding new outputs
1681
1682 out$refittedPredictedResponse <- matrix(nrow = out$nObs, ncol = n )
1683 out$refittedFixedEffects <- matrix(nrow = length(out$fittedFixedEffects), ncol = n )
1684 #out$refittedRandomEffects <- matrix(nrow = length(out$fittedRandomEffects), ncol = n )
1685 out$refittedResiduals = matrix(nrow = out$nObs, ncol = n)
1686 out$refittedPearsonResiduals = matrix(nrow = out$nObs, ncol = n)
1687
1688 out$simulatedResponse = getSimulations(fittedModel, nsim = n, type = "refit", ...)
1689
1690 for (i in 1:n){
1691
1692 simObserved = out$simulatedResponse[[i]]
1693
1694 try({
1695
1696 # for testing
1697 # if (i==3) stop("x")
1698 # Note: also set silent = T for production
1699
1700 refittedModel = getRefit(fittedModel, simObserved)
1701
1702 out$refittedPredictedResponse[,i] = predict(refittedModel, type = "response")
1703 out$refittedFixedEffects[,i] = getFixedEffects(refittedModel)
1704 out$refittedResiduals[,i] = residuals(refittedModel, type = "response")
1705 out$refittedPearsonResiduals[,i] = residuals(refittedModel, type = "pearson")
1706 #out$refittedRandomEffects[,i] = ranef(refittedModel)
1707 }, silent = TRUE)
1708 }
1709
1710 ######### residual checks ###########
1711
1712 if(anyNA(out$refittedResiduals)) warning("DHARMa::simulateResiduals warning: on refit = TRUE, at least one of the refitted models produced an error. Inspect the refitted model values. Results may not be reliable.")
1713
1714 ## check for convergence problems
1715
1716 dup = sum(duplicated(out$refittedFixedEffects, MARGIN = 2))
1717 if (dup > 0){
1718 if (dup < n/3){
1719 warning(paste("There were", dup, "of", n ,"duplicate parameter estimates in the refitted models. This may hint towards a problem with optimizer convergence in the fitted models. Results may not be reliable. The suggested action is to not use the refitting procedure, and diagnose with tools available for the normal (not refitted) simulated residuals. If you absolutely require the refitting procedure, try changing tolerance / iterations in the optimizer settings."))
1720 } else {
1721 warning(paste("There were", dup, "of", n ,"duplicate parameter estimates in the refitted models. This may hint towards a problem with optimizer convergence in the fitted models. Results are likely not reliable. The suggested action is to not use the refitting procedure, and diagnose with tools available for the normal (not refitted) simulated residuals. If you absolutely require the refitting procedure, try changing tolerance / iterations in the optimizer settings."))
1722 out$problems[[length(out$problems)+ 1]] = "error in refit"
1723 }
1724 }
1725
1726 ######### residual calculations ###########
1727
1728 out$scaledResiduals = getQuantile(simulations = out$refittedResiduals, observed = out$fittedResiduals, integerResponse = integerResponse, method = method)
1729 }
1730
1731 ########### Wrapup ############
1732
1733 out$time = proc.time() - ptm
1734 out$randomState = randomState
1735
1736 class(out) = "DHARMa"
1737
1738 if(plot == TRUE) plot(out)
1739
1740 return(out)
1741 }
1742
1743 getPossibleModels<-function()c("lm", "glm", "negbin", "lmerMod", "glmerMod", "gam", "bam", "glmmTMB", "HLfit")
1744
1745
1746
1747 #' Check if the fitted model is supported by DHARMa
1748 #'
1749 #' The function checks if the fitted model is supported by DHARMa, and if there are other issues that could create problems
1750 #'
1751 #' @param fittedModel a fitted model
1752 #' @param stop whether to throw an error if the model is not supported by DHARMa
1753 #'
1754 #' @details The main purpose of this function os to check if the fitted model class is supported by DHARMa. The function additionally checks for properties of the fitted model that could create problems for calculating residuals or working with the resuls in DHARMa.
1755 #'
1756 #'
1757 #' @keywords internal
1758 checkModel <- function(fittedModel, stop = F){
1759
1760 out = T
1761
1762 if(!(class(fittedModel)[1] %in% getPossibleModels())){
1763 if(stop == FALSE) warning("DHARMa: fittedModel not in class of supported models. Absolutely no guarantee that this will work!")
1764 else stop("DHARMa: fittedModel not in class of supported models")
1765 }
1766
1767 # if(hasNA(fittedModel)) message("It seems there were NA values in the data used for fitting the model. This can create problems if you supply additional data to DHARMa functions. See ?checkModel for details")
1768
1769 # TODO: check as implemented does not work reliably, check if there is any other option to check for NA
1770 # #' @example inst/examples/checkModelHelp.R
1771
1772 # NA values in the data: checkModel will detect if there were NA values in the data frame. For NA values, most regression models will remove the entire observation from the data. This is not a problem for DHARMa - residuals are then only calculated for non-NA rows in the data. However, if you provide additional predictors to DHARMa, for example to plot residuals against a predictor, you will have to remove all NA rows that were also removed in the model. For most models, you can get the rows of the data that were actually used in the fit via rownames(model.frame(fittedModel))
1773
1774
1775 if (class(fittedModel)[1] == "gam" ) if (class(fittedModel$family)[1] == "extended.family") stop("It seems you are trying to fit a model from mgcv that was fit with an extended.family. Simulation functions for these families are not yet implemented in DHARMa. See issue https://github.com/florianhartig/DHARMa/issues/11 for updates about this")
1776
1777 }
1778
1779
1780
1781 #' Check simulated data
1782 #'
1783 #' The function checks if the simulated data seems fine
1784 #'
1785 #' @param simulatedResponse the simulated response
1786 #' @param nObs number of observations
1787 #' @param nSim number of simulations
1788 #'
1789 #' @keywords internal
1790 checkSimulations <- function(simulatedResponse, nObs, nSim){
1791
1792 if(!inherits(simulatedResponse, "matrix")) securityAssertion("Simulation from the model produced wrong class", stop = T)
1793
1794 if(any(dim(simulatedResponse) != c(nObs, nSim) )) securityAssertion("Simulation from the model produced wrong dimension", stop = T)
1795
1796 if(any(!is.finite(simulatedResponse))) message("Simulations from your fitted model produce infinite values. Consider if this is sensible")
1797
1798 if(any(is.nan(simulatedResponse))) securityAssertion("Simulations from your fitted model produce NaN values. DHARMa cannot calculated residuals for this. This is nearly certainly an error of the regression package you are using", stop = T)
1799 if(any(is.na(simulatedResponse))) securityAssertion("Simulations from your fitted model produce NA values. DHARMa cannot calculated residuals for this. This is nearly certainly an error of the regression package you are using", stop = T)
1800
1801 }
1802
1803
1804
1805
1806 #' Recalculate residuals with grouping
1807 #'
1808 #' The purpose of this function is to recalculate scaled residuals per group, based on the simulations done by \code{\link{simulateResiduals}}
1809 #'
1810 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}}
1811 #' @param group group of each data point
1812 #' @param aggregateBy function for the aggregation. Default is sum. This should only be changed if you know what you are doing. Note in particular that the expected residual distribution might not be flat any more if you choose general functions, such as sd etc.
1813 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details.
1814 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}}
1815 #' @return an object of class DHARMa, similar to what is returned by \code{\link{simulateResiduals}}, but with additional outputs for the new grouped calculations. Note that the relevant outputs are 2x in the object, the first is the grouped calculations (which is returned by $name access), and later another time, under identical name, the original output. Moreover, there is a function 'aggregateByGroup', which can be used to aggregate predictor variables in the same way as the variables calculated here
1816 #'
1817 #' @example inst/examples/simulateResidualsHelp.R
1818 #' @export
1819 recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = sum, seed = 123, method = c("PIT", "traditional")){
1820
1821 randomState <-getRandomState(seed)
1822 on.exit({randomState$restoreCurrent()})
1823 match.arg(method)
1824
1825 if(!is.null(simulationOutput$original)) simulationOutput = simulationOutput$original
1826
1827 out = list()
1828 out$original = simulationOutput
1829
1830 if(is.null(group)) return(simulationOutput)
1831 else group =as.factor(group)
1832 out$nGroups = nlevels(group)
1833
1834 aggregateByGroup <- function(x) aggregate(x, by=list(group), FUN=aggregateBy)[,2]
1835
1836 out$observedResponse = aggregateByGroup(simulationOutput$observedResponse)
1837 out$fittedPredictedResponse = aggregateByGroup(simulationOutput$fittedPredictedResponse)
1838
1839 if (simulationOutput$refit == F){
1840
1841 out$simulatedResponse = apply(simulationOutput$simulatedResponse, 2, aggregateByGroup)
1842 out$scaledResiduals = getQuantile(simulations = out$simulatedResponse , observed = out$observedResponse , integerResponse = simulationOutput$integerResponse, method = method)
1843
1844 ######## refit = T ##################
1845 } else {
1846
1847 out$refittedPredictedResponse <- apply(simulationOutput$refittedPredictedResponse, 2, aggregateByGroup)
1848 out$fittedResiduals = aggregateByGroup(simulationOutput$fittedResiduals)
1849 out$refittedResiduals = apply(simulationOutput$refittedResiduals, 2, aggregateByGroup)
1850 out$refittedPearsonResiduals = apply(simulationOutput$refittedPearsonResiduals, 2, aggregateByGroup)
1851
1852 out$scaledResiduals = getQuantile(simulations = out$refittedResiduals , observed = out$fittedResiduals , integerResponse = simulationOutput$integerResponse, method = method)
1853
1854 }
1855
1856 # hack - the c here will result in both old and new outputs to be present resulting output, but a named access should refer to the new, grouped calculations
1857 # question to myself - what's the use of that, why not erase the old outputs? they are anyway saved in the old object
1858
1859 out$aggregateByGroup = aggregateByGroup
1860 out = c(out, simulationOutput)
1861 out$randomState = randomState
1862 class(out) = "DHARMa"
1863 return(out)
1864 }
1865
1866 ################ simulateResiduals.R
1867
1868 ################ DHARMa.R
1869
1870 #' @title DHARMa - Residual Diagnostics for HierArchical (Multi-level / Mixed) Regression Models
1871 #' @name DHARMa
1872 #' @docType package
1873 #' @description The 'DHARMa' package uses a simulation-based approach to create readily interpretable scaled (quantile) residuals for fitted (generalized) linear mixed models. Currently supported are linear and generalized linear (mixed) models from 'lme4' (classes 'lmerMod', 'glmerMod'), 'glmmTMB' and 'spaMM', generalized additive models ('gam' from 'mgcv'), 'glm' (including 'negbin' from 'MASS', but excluding quasi-distributions) and 'lm' model classes. Moreover, externally created simulations, e.g. posterior predictive simulations from Bayesian software such as 'JAGS', 'STAN', or 'BUGS' can be processed as well. The resulting residuals are standardized to values between 0 and 1 and can be interpreted as intuitively as residuals from a linear regression. The package also provides a number of plot and test functions for typical model misspecification problems, such as over/underdispersion, zero-inflation, and residual spatial and temporal autocorrelation.
1874 #' @details See index / vignette for details
1875 #' @seealso \code{\link{simulateResiduals}}
1876 #' @examples
1877 #' vignette("DHARMa", package="DHARMa")
1878 NULL
1879
1880
1881 #' Print simulated residuals
1882 #'
1883 #' @param x an object with simulated residuals created by \code{\link{simulateResiduals}}
1884 #' @param ... optional arguments for compatibility with the generic function, no function implemented
1885 #' @export
1886 print.DHARMa <- function(x, ...){
1887 cat(paste("Object of Class DHARMa with simulated residuals based on", x$nSim, "simulations with refit =", x$refit , ". See ?DHARMa::simulateResiduals for help."), "\n", "\n")
1888 if (length(x$scaledResiduals) < 20) cat("Scaled residual values:", x$scaledResiduals)
1889 else {
1890 cat("Scaled residual values:", x$scaledResiduals[1:20], "...")
1891 }
1892 }
1893
1894 #' Return residuals of a DHARMa simulation
1895 #'
1896 #' @param object an object with simulated residuals created by \code{\link{simulateResiduals}}
1897 #' @param quantileFunction optional - a quantile function to transform the uniform 0/1 scaling of DHARMa to another distribution
1898 #' @param outlierValues if a quantile function with infinite support (such as dnorm) is used, residuals that are 0/1 are mapped to -Inf / Inf. outlierValues allows to convert -Inf / Inf values to an optional min / max value.
1899 #' @param ... optional arguments for compatibility with the generic function, no function implemented
1900 #' @details the function accesses the slot $scaledResiduals in a fitted DHARMa object, and optionally transforms the standard DHARMa quantile residuals (which have a uniform distribution) to a particular pdf.
1901 #'
1902 #' @note some of the papers on simulated quantile residuals transforming the residuals (which are natively uniform) back to a normal distribution. I presume this is because of the larger familiarity of most users with normal residuals. Personally, I never considered this desirable, for the reasons explained in https://github.com/florianhartig/DHARMa/issues/39, but with this function, I wanted to give users the option to plot normal residuals if they so wish.
1903 #'
1904 #' @export
1905 #' @example inst/examples/simulateResidualsHelp.R
1906 #'
1907 residuals.DHARMa <- function(object, quantileFunction = NULL, outlierValues = NULL, ...){
1908
1909 if(is.null(quantileFunction)){
1910 return(object$scaledResiduals)
1911 } else {
1912 res = quantileFunction(object$scaledResiduals)
1913 if(!is.null(outlierValues)){
1914 res = ifelse(res == -Inf, outlierValues[1], res)
1915 res = ifelse(res == Inf, outlierValues[2], res)
1916 }
1917 return(res)
1918 }
1919 }
1920
1921
1922
1923 #' Return outliers
1924 #'
1925 #' Returns the outliers of a DHARMa object
1926 #'
1927 #' @param object an object with simulated residuals created by \code{\link{simulateResiduals}}
1928 #' @param lowerQuantile lower threshold for outliers. Default is zero = outside simulation envelope
1929 #' @param upperQuantile upper threshold for outliers. Default is 1 = outside simulation envelope
1930 #' @param return wheter to return an indices of outliers or a logical vector
1931 #'
1932 #' @details First of all, note that the standard definition of outlier in the DHARMa plots and outlier tests is an observation that is outside the simulation envelope. How far outside that is depends a lot on how many simulations you do. If you have 100 data points and to 100 simulations, you would expect to have one "outlier" on average, even with a perfectly fitting model. This is in fact what the outlier test tests.
1933 #'
1934 #' Thus, keep in mind that for a small number of simulations, outliers are mostly a technical term: these are points that are outside our simulations, but we don't know how far away they are.
1935 #'
1936 #' If you are seriously interested in HOW FAR outside the expected distribution a data point is, you should increase the number of simulations in \code{\link{simulateResiduals}} to be sure to get the tail of the data distribution correctly. In this case, it may make sense to adjust lowerQuantile and upperQuantile, e.g. to 0.025, 0.975, which would define outliers as values outside the central 95% of the distribution.
1937 #'
1938 #' Also, note that outliers are particularly concerning if they have a strong influence on the model fit. One could test the influence, for example, by removing them from the data, or by some meausures of leverage, e.g. generalisations for Cook's distance as in Pinho, L. G. B., Nobre, J. S., & Singer, J. M. (2015). Cook’s distance for generalized linear mixed models. Computational Statistics & Data Analysis, 82, 126–136. doi:10.1016/j.csda.2014.08.008. At the moment, however, no such function is provided in DHARMa.
1939 #'
1940 #' @export
1941 #'
1942 outliers <- function(object, lowerQuantile = 0, upperQuantile = 1, return = c("index", "logical")){
1943
1944 return = match.arg(return)
1945
1946 out = residuals(object) >= upperQuantile | residuals(object) <= lowerQuantile
1947
1948 if(return == "logical") return(out)
1949 else(return(which(out)))
1950 }
1951
1952
1953
1954 #' Create a DHARMa object from hand-coded simulations or Bayesian posterior predictive simulations
1955 #'
1956 #' @param simulatedResponse matrix of observations simulated from the fitted model - row index for observations and colum index for simulations
1957 #' @param observedResponse true observations
1958 #' @param fittedPredictedResponse optional fitted predicted response. For Bayesian posterior predictive simulations, using the median posterior prediction as fittedPredictedResponse is recommended. If not provided, the mean simulatedResponse will be used.
1959 #' @param integerResponse if T, noise will be added at to the residuals to maintain a uniform expectations for integer responses (such as Poisson or Binomial). Unlike in \code{\link{simulateResiduals}}, the nature of the data is not automatically detected, so this MUST be set by the user appropriately
1960 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details.
1961 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}}
1962 #' @details The use of this function is to convert simulated residuals (e.g. from a point estimate, or Bayesian p-values) to a DHARMa object, to make use of the plotting / test functions in DHARMa
1963 #' @note Either scaled residuals or (simulatedResponse AND observed response) have to be provided
1964 #' @example inst/examples/createDharmaHelp.R
1965 #' @export
1966 createDHARMa <- function(simulatedResponse , observedResponse , fittedPredictedResponse = NULL, integerResponse = F, seed = 123, method = c("PIT", "traditional")){
1967
1968 randomState <-getRandomState(seed)
1969 on.exit({randomState$restoreCurrent()})
1970 match.arg(method)
1971
1972 out = list()
1973 out$simulatedResponse = simulatedResponse
1974 out$refit = F
1975 out$integerResponse = integerResponse
1976 out$observedResponse = observedResponse
1977
1978 if(!is.matrix(simulatedResponse) & !is.null(observedResponse)) stop("either scaled residuals or simulations and observations have to be provided")
1979 if(ncol(simulatedResponse) < 2) stop("simulatedResponse with less than 2 simulations provided - cannot calculate residuals on that.")
1980
1981 if(ncol(simulatedResponse) < 10) warning("simulatedResponse with less than 10 simulations provided. This rarely makes sense")
1982
1983 out$nObs = length(observedResponse)
1984
1985 if (out$nObs < 3) stop("warning - number of observations < 3 ... this rarely makes sense")
1986
1987 if(! (out$nObs == nrow(simulatedResponse))) stop("dimensions of observedResponse and simulatedResponse do not match")
1988
1989 out$nSim = ncol(simulatedResponse)
1990
1991 out$scaledResiduals = getQuantile(simulations = simulatedResponse , observed = observedResponse , integerResponse = integerResponse, method = method)
1992
1993
1994 # makes sure that DHARM plots that rely on this vector won't crash
1995 if(is.null(fittedPredictedResponse)){
1996 message("No fitted predicted response provided, using the mean of the simulations")
1997 fittedPredictedResponse = apply(simulatedResponse, 1, mean)
1998 }
1999 out$fittedPredictedResponse = fittedPredictedResponse
2000 out$randomState = randomState
2001 class(out) = "DHARMa"
2002 return(out)
2003 }
2004
2005
2006 #' Ensures that an object is of class DHARMa
2007 #'
2008 #' @param simulationOutput a DHARMa simulation output or an object that can be converted into a DHARMa simulation output
2009 #' @param convert if TRUE, attempts to convert model + numeric to DHARMa, if "Model", converts only supported models to DHARMa
2010 #' @details The
2011 #' @return an object of class DHARMa
2012 #' @keywords internal
2013 ensureDHARMa <- function(simulationOutput,
2014 convert = F){
2015
2016 if(inherits(simulationOutput, "DHARMa")){
2017 return(simulationOutput)
2018 } else {
2019
2020 if(convert == FALSE) stop("wrong argument to function, simulationOutput must be a DHARMa object!")
2021 else {
2022
2023 if (class(simulationOutput)[1] %in% getPossibleModels()){
2024 if (convert == "Model" | convert == T) return(simulateResiduals(simulationOutput))
2025 } else if(is.vector(simulationOutput, mode = "numeric") & convert == T) {
2026 out = list()
2027 out$scaledResiduals = simulationOutput
2028 out$nObs = length(out$scaledResiduals)
2029 class(out) = "DHARMa"
2030 return(out)
2031 }
2032 }
2033 }
2034 stop("wrong argument to function, simulationOutput must be a DHARMa object or a numeric vector of quantile residuals!")
2035 }
2036
2037 ####################### DHARMa.R
2038
2039 ####################### tests.R
2040
2041 #' DHARMa general residual test
2042 #'
2043 #' Calls both uniformity and dispersion test
2044 #'
2045 #' This function is a wrapper for the various test functions implemented in DHARMa. Currently, this function calls the \code{\link{testUniformity}} and the \code{\link{testDispersion}} functions. All other tests (see list below) have to be called by hand.
2046 #'
2047 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2048 #' @param plot if T, plots functions of the tests are called
2049 #' @author Florian Hartig
2050 #' @seealso \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2051 #' @example inst/examples/testsHelp.R
2052 #' @export
2053 testResiduals <- function(simulationOutput, plot = T){
2054
2055 opar = par(mfrow = c(1,3))
2056 on.exit(par(opar))
2057 out = list()
2058 out$uniformity = testUniformity(simulationOutput, plot = plot)
2059 out$dispersion = testDispersion(simulationOutput, plot = plot)
2060 out$outliers = testOutliers(simulationOutput, plot = plot)
2061
2062 print(out)
2063 return(out)
2064 }
2065
2066 #' Residual tests
2067 #'
2068 #' @details Deprecated, switch your code to using the \code{\link{testResiduals}} function
2069 #'
2070 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2071 #' @author Florian Hartig
2072 #' @export
2073 testSimulatedResiduals <- function(simulationOutput){
2074 message("testSimulatedResiduals is deprecated, switch your code to using the testResiduals function")
2075 testResiduals(simulationOutput)
2076 }
2077
2078
2079 #' Test for overall uniformity
2080 #'
2081 #' This function tests the overall uniformity of the simulated residuals in a DHARMa object
2082 #'
2083 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2084 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis. See \code{\link[stats]{ks.test}} for details
2085 #' @param plot if T, plots calls \code{\link{plotQQunif}} as well
2086 #' @details The function applies a \code{\link[stats]{ks.test}} for uniformity on the simulated residuals.
2087 #' @author Florian Hartig
2088 #' @seealso \code{\link{testResiduals}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2089 #' @example inst/examples/testsHelp.R
2090 #' @export
2091 testUniformity<- function(simulationOutput, alternative = c("two.sided", "less", "greater"), plot = T){
2092
2093 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
2094
2095 out <- suppressWarnings(ks.test(simulationOutput$scaledResiduals, 'punif', alternative = alternative))
2096 if(plot == T) plotQQunif(simulationOutput = simulationOutput)
2097 return(out)
2098 }
2099
2100
2101 # Experimental
2102 testBivariateUniformity<- function(simulationOutput, alternative = c("two.sided", "less", "greater"), plot = T){
2103
2104 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
2105
2106 #out <- suppressWarnings(ks.test(simulationOutput$scaledResiduals, 'punif', alternative = alternative))
2107 #if(plot == T) plotQQunif(simulationOutput = simulationOutput)
2108 out = NULL
2109 return(out)
2110 }
2111
2112
2113
2114 #' Test for quantiles
2115 #'
2116 #' This function tests
2117 #'
2118 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2119 #' @param predictor an optional predictor variable to be used, instead of the predicted response (default)
2120 #' @param quantiles the quantiles to be tested
2121 #' @param plot if T, the function will create an additional plot
2122 #' @details The function fits quantile regressions (via package qgam) on the residuals, and compares their location to the expected location (because of the uniform distributionm, the expected location is 0.5 for the 0.5 quantile).
2123 #'
2124 #' A significant p-value for the splines means the fitted spline deviates from a flat line at the expected location (p-values of intercept and spline are combined via Benjamini & Hochberg adjustment to control the FDR)
2125 #'
2126 #' The p-values of the splines are combined into a total p-value via Benjamini & Hochberg adjustment to control the FDR.
2127 #'
2128 #' @author Florian Hartig
2129 #' @example inst/examples/testQuantilesHelp.R
2130 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testOutliers}}
2131 #' @export
2132 testQuantiles <- function(simulationOutput, predictor = NULL, quantiles = c(0.25,0.5,0.75), plot = T){
2133
2134 if(plot == F){
2135
2136 out = list()
2137 out$data.name = deparse(substitute(simulationOutput))
2138
2139 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
2140 res = simulationOutput$scaledResiduals
2141 pred = ensurePredictor(simulationOutput, predictor)
2142
2143 dat=data.frame(res = simulationOutput$scaledResiduals , pred = pred)
2144
2145 quantileFits <- list()
2146 pval = rep(NA, length(quantiles))
2147 predictions = data.frame(pred = sort(dat$pred))
2148 predictions = cbind(predictions, matrix(ncol = 2 * length(quantiles), nrow = nrow(dat)))
2149 for(i in 1:length(quantiles)){
2150 datTemp = dat
2151 datTemp$res = datTemp$res - quantiles[i]
2152
2153 # settings for k = the dimension of the basis used to represent the smooth term.
2154 # see https://github.com/mfasiolo/qgam/issues/37
2155 dimSmooth = min(length(unique(datTemp$pred)), 10)
2156 quantResult = try(capture.output(quantileFits[[i]] <- qgam::qgam(res ~ s(pred, k = dimSmooth) , data =datTemp, qu = quantiles[i])), silent = T)
2157 if(inherits(quantResult, "try-error")){
2158 message("Unable to calculate quantile regression for quantile ", quantiles[i], ". Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.")
2159 } else {
2160 x = summary(quantileFits[[i]])
2161 pval[i] = min(p.adjust(c(x$p.table[1,4], x$s.table[1,4]), method = "BH")) # correction for test on slope and intercept
2162 quantPre = predict(quantileFits[[i]], newdata = predictions, se = T)
2163 predictions[, 2*i] = quantPre$fit + quantiles[i]
2164 predictions[, 2*i + 1] = quantPre$se.fit
2165 }
2166 }
2167
2168 out$method = "Test for location of quantiles via qgam"
2169 out$alternative = "both"
2170 out$pvals = pval
2171 out$p.value = min(p.adjust(pval, method = "BH")) # correction for multiple quantile tests
2172 out$predictions = predictions
2173 out$qgamFits = quantileFits
2174
2175 class(out) = "htest"
2176
2177 } else if(plot == T) {
2178 out <- plotResiduals(simulationOutput = simulationOutput, predictor = predictor, quantiles = quantiles)
2179 }
2180 return(out)
2181 }
2182
2183
2184 #unif.2017YMi(X, type = c("Q1", "Q2", "Q3"), lower = rep(0, ncol(X)),upper = rep(1, ncol(X)))
2185
2186 #' Test for outliers
2187 #'
2188 #' This function tests if the number of observations outside the simulatio envelope are larger or smaller than expected
2189 #'
2190 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2191 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" (default) compared to the simulated null hypothesis
2192 #' @param margin whether to test for outliers only at the lower, only at the upper, or both sides (default) of the simulated data distribution
2193 #' @param plot if T, the function will create an additional plot
2194 #' @details DHARMa residuals are created by simulating from the fitted model, and comparing the simulated values to the observed data. It can occur that all simulated values are higher or smaller than the observed data, in which case they get the residual value of 0 and 1, respectively. I refer to these values as simulation outliers, or simply outliers.
2195 #'
2196 #' Because no data was simulated in the range of the observed value, we don't know "how strongly" these values deviate from the model expectation, so the term "outlier" should be used with a grain of salt - it's not a judgment about the magnitude of a deviation from an expectation, but simply that we are outside the simulated range, and thus cannot say anything more about the location of the residual.
2197 #'
2198 #' Note also that the number of outliers will decrease as we increase the number of simulations. Under the null hypothesis that the model is correct, we expect nData /(nSim +1) outliers at each margin of the distribution. For a reason, consider that if the data and the model distribution are identical, the probability that a given observation is higher than all simulations is 1/(nSim +1).
2199 #'
2200 #' Based on this null expectation, we can test for an excess or lack of outliers. Per default, testOutliers() looks for both, so if you get a significant p-value, you have to check if you have to many or too few outliers. An excess of outliers is to be interpreted as too many values outside the simulation envelope. This could be caused by overdispersion, or by what we classically call outliers. A lack of outliers would be caused, for example, by underdispersion.
2201 #'
2202 #'
2203 #' @author Florian Hartig
2204 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2205 #' @example inst/examples/testOutliersHelp.R
2206 #' @export
2207 testOutliers <- function(simulationOutput, alternative = c("two.sided", "greater", "less"), margin = c("both", "upper", "lower"), plot = T){
2208
2209 # check inputs
2210 alternative = match.arg(alternative)
2211 margin = match.arg(margin)
2212 data.name = deparse(substitute(simulationOutput)) # remember: needs to be called before ensureDHARMa
2213 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model")
2214
2215 # calculation of outliers
2216 if(margin == "both") outliers = sum(simulationOutput$scaledResiduals %in% c(0, 1))
2217 if(margin == "upper") outliers = sum(simulationOutput$scaledResiduals == 1)
2218 if(margin == "lower") outliers = sum(simulationOutput$scaledResiduals == 0)
2219
2220 # calculations of trials and H0
2221 outFreqH0 = 1/(simulationOutput$nSim +1) * ifelse(margin == "both", 2, 1)
2222 trials = simulationOutput$nObs
2223
2224 out = binom.test(outliers, trials, p = outFreqH0, alternative = alternative)
2225
2226 # overwrite information in binom.test
2227
2228 out$data.name = data.name
2229 out$margin = margin
2230 out$method = "DHARMa outlier test based on exact binomial test"
2231
2232 names(out$statistic) = paste("outliers at", margin, "margin(s)")
2233 names(out$parameter) = "simulations"
2234 names(out$estimate) = paste("frequency of outliers (expected:", out$null.value,")")
2235
2236 out$interval = "tst"
2237
2238 out$interval =
2239
2240 if(plot == T) {
2241
2242 hist(simulationOutput, main = "")
2243
2244 main = ifelse(out$p.value <= 0.05,
2245 "Outlier test significant",
2246 "Outlier test n.s.")
2247
2248 title(main = main, cex.main = 1,
2249 col.main = ifelse(out$p.value <= 0.05, "red", "black"))
2250
2251 # legend("center", c(paste("p=", round(out$p.value, digits = 5)), paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(out$p.value < 0.05, "red", "black" ))
2252
2253 }
2254 return(out)
2255 }
2256
2257
2258 #' DHARMa dispersion tests
2259 #'
2260 #' This function performs a simulation-based test for over/underdispersion
2261 #'
2262 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2263 #' @param plot whether to plot output
2264 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis. Greater corresponds to overdispersion.
2265 #' @param ... arguments to pass on to \code{\link{testGeneric}}
2266 #' @details The function implements two tests, depending on whether it is applied on a simulation with refit = F, or refit = T.
2267 #'
2268 #' If refit = F, the function tests the sd of the data against the sd of the simulated data.
2269 #'
2270 #' If refit = T, the function compares the approximate deviance (via squared pearson residuals) with the same quantity from the models refitted with simulated data. Applying this is much slower than the previous alternative. Given the computational cost, I would suggest that most users will be satisfied with the standard dispersion test.
2271 #'
2272 #' @note The results of the dispersion test can can differ depending on whether it is evaluated on conditional (= conditional on fitted random effects) or unconditional (= REs are re-simulated) simulations. You can change between conditional or unconditional simulations in \code{\link{simulateResiduals}} if this is supported by the regression package that you use. The default in DHARMa is to use unconditional simulations, but I have often found that conditional simulations are more sensitive to dispersion problems. I recommend trying both, as neither test should be positive if the dispersion is correct.
2273 #'
2274 #' @author Florian Hartig
2275 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2276 #' @example inst/examples/testsHelp.R
2277 #' @export
2278 testDispersion <- function(simulationOutput, alternative = c("two.sided", "greater", "less"), plot = T, ...){
2279
2280 out = list()
2281 out$data.name = deparse(substitute(simulationOutput))
2282
2283 alternative <- match.arg(alternative)
2284
2285 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model")
2286
2287 if(simulationOutput$refit == F){
2288
2289 spread <- function(x) sd(x - simulationOutput$fittedPredictedResponse)
2290 out = testGeneric(simulationOutput, summary = spread, alternative = alternative, methodName = "DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated", plot = plot, ...)
2291 } else {
2292
2293 observed = tryCatch(sum(residuals(simulationOutput$fittedModel, type = "pearson")^2), error = function(e) {
2294 message(paste("DHARMa: the requested tests requires pearson residuals, but your model does not implement these calculations. Test will return NA. Error message:", e))
2295 return(NA)
2296 })
2297 if(is.na(observed)) return(NA)
2298 expected = apply(simulationOutput$refittedPearsonResiduals^2 , 2, sum)
2299 out$statistic = c(dispersion = observed / mean(expected))
2300 out$method = "DHARMa nonparametric dispersion test via mean deviance residual fitted vs. simulated-refitted"
2301
2302 p = getP(simulated = expected, observed = observed, alternative = alternative)
2303
2304 out$alternative = alternative
2305 out$p.value = p
2306 class(out) = "htest"
2307
2308 if(plot == T) {
2309 #plotTitle = gsub('(.{1,50})(\\s|$)', '\\1\n', out$method)
2310 xLabel = paste("Simulated values, red line = fitted model. p-value (",out$alternative, ") = ", out$p.value, sep ="")
2311
2312 hist(expected, xlim = range(expected, observed, na.rm=T ), col = "lightgrey", main = "", xlab = xLabel, breaks = 20, cex.main = 1)
2313 abline(v = observed, lwd= 2, col = "red")
2314
2315 main = ifelse(out$p.value <= 0.05,
2316 "Dispersion test significant",
2317 "Dispersion test n.s.")
2318
2319 title(main = main, cex.main = 1,
2320 col.main = ifelse(out$p.value <= 0.05, "red", "black"))
2321 }
2322 }
2323
2324 return(out)
2325 }
2326
2327 #' Simulated overdisperstion tests
2328 #'
2329 #' @details Deprecated, switch your code to using the \code{\link{testDispersion}} function
2330 #'
2331 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2332 #' @param ... additional arguments to \code{\link{testDispersion}}
2333 #' @export
2334 testOverdispersion <- function(simulationOutput, ...){
2335 message("testOverdispersion is deprecated, switch your code to using the testDispersion function")
2336 testDispersion(simulationOutput, ...)
2337 }
2338
2339 #' Parametric overdisperstion tests
2340 #'
2341 #' @details Deprecated, switch your code to using the \code{\link{testDispersion}} function. The function will do nothing, arguments will be ignored, the parametric tests is no longer recommend
2342 #'
2343 #' @param ... arguments will be ignored, the parametric tests is no longer recommend
2344 #' @export
2345 testOverdispersionParametric <- function(...){
2346 message("testOverdispersionParametric is deprecated and no longer recommended, see release notes in DHARMA 0.2.0 - switch your code to using the testDispersion function")
2347 return(0)
2348 }
2349
2350
2351 #' Tests for zero-inflation
2352 #'
2353 #' This function compares the observed number of zeros with the zeros expected from simulations.
2354 #'
2355 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2356 #' @param ... further arguments to \code{\link{testGeneric}}
2357 #' @details The plot shows the expected distribution of zeros against the observed values, the ratioObsSim shows observed vs. simulated zeros. A value < 1 means that the observed data has less zeros than expected, a value > 1 means that it has more zeros than expected (aka zero-inflation). Per default, the function tests both sides.
2358 #'
2359 #' Some notes about common problems / questions:
2360 #'
2361 #' * Zero-inflation tests after fitting the model are crucial to see if you have zero-inflation. Just because there are a lot of zeros doesn't mean you have zero-inflation, see Warton, D. I. (2005). Many zeros does not mean zero inflation: comparing the goodness-of-fit of parametric models to multivariate abundance data. Environmetrics 16(3), 275-289.
2362 #'
2363 #' * That being said, zero-inflation tests are often not a reliable guide to decide wheter to add a zi term or not. In general, model structures should be decided on ideally a priori, if that is not possible via model selection techniques (AIC, BIC, WAIC, Bayes Factor). A zero-inflation test should only be run after that decision, and to validate the decision that was taken.
2364 #'
2365 #' @note This function is a wrapper for \code{\link{testGeneric}}, where the summary argument is set to function(x) sum(x == 0)
2366 #' @author Florian Hartig
2367 #' @example inst/examples/testsHelp.R
2368 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2369 #' @export
2370 testZeroInflation <- function(simulationOutput, ...){
2371 countZeros <- function(x) sum( x == 0)
2372 testGeneric(simulationOutput = simulationOutput, summary = countZeros, methodName = "DHARMa zero-inflation test via comparison to expected zeros with simulation under H0 = fitted model", ... )
2373 }
2374
2375
2376 #' Generic simulation test of a summary statistic
2377 #'
2378 #' This function tests if a user-defined summary differs when applied to simulated / observed data.
2379 #'
2380 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2381 #' @param summary a function that can be applied to simulated / observed data. See examples below
2382 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis
2383 #' @param plot whether to plot the simulated summary
2384 #' @param methodName name of the test (will be used in plot)
2385 #'
2386 #' @details This function tests if a user-defined summary differs when applied to simulated / observed data. the function can easily be remodeled to apply summaries on the residuals, by simply defining f = function(x) summary (x - predictions), as done in \code{\link{testDispersion}}
2387 #'
2388 #' @note The function that you supply is applied on the data as it is represented in your fitted model, which may not always correspond to how you think. This is important in particular when you use k/n binomial data, and want to test for 1-inflation. As an example, if have k/20 observations, and you provide your data via cbind (y, y-20), you have to test for 20-inflation (because this is how the data is represented in the model). However, if you provide data via y/20, and weights = 20, you should test for 1-inflation. In doubt, check how the data is internally represented in model.frame(model), or via simulate(model)
2389 #'
2390 #' @export
2391 #' @author Florian Hartig
2392 #' @example inst/examples/testsHelp.R
2393 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2394 testGeneric <- function(simulationOutput, summary, alternative = c("two.sided", "greater", "less"), plot = T, methodName = "DHARMa generic simulation test"){
2395
2396 out = list()
2397 out$data.name = deparse(substitute(simulationOutput))
2398
2399 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model")
2400
2401 alternative <- match.arg(alternative)
2402
2403 observed = summary(simulationOutput$observedResponse)
2404
2405 simulated = apply(simulationOutput$simulatedResponse, 2, summary)
2406
2407 p = getP(simulated = simulated, observed = observed, alternative = alternative)
2408
2409 out$statistic = c(ratioObsSim = observed / mean(simulated))
2410 out$method = methodName
2411 out$alternative = alternative
2412 out$p.value = p
2413
2414
2415 class(out) = "htest"
2416
2417 if(plot == T) {
2418 plotTitle = gsub('(.{1,50})(\\s|$)', '\\1\n', methodName)
2419 xLabel = paste("Simulated values, red line = fitted model. p-value (",out$alternative, ") = ", out$p.value, sep ="")
2420 hist(simulated, xlim = range(simulated, observed, na.rm=T ), col = "lightgrey", main = plotTitle, xlab = xLabel, breaks = max(round(simulationOutput$nSim / 5), 20), cex.main = 0.8)
2421 abline(v = observed, lwd= 2, col = "red")
2422 }
2423 return(out)
2424 }
2425
2426
2427 #' Test for temporal autocorrelation
2428 #'
2429 #' This function performs a standard test for temporal autocorrelation on the simulated residuals
2430 #'
2431 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}}
2432 #' @param time the time, in the same order as the data points. If not provided, random values will be created
2433 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis
2434 #' @param plot whether to plot output
2435 #' @details The function performs a Durbin-Watson test on the uniformly scaled residuals, and plots the residuals against time. The DB test was originally be designed for normal residuals. In simulations, I didn't see a problem with this setting though. The alternative is to transform the uniform residuals to normal residuals and perform the DB test on those.
2436 #'
2437 #' If no time values are provided, random values will be created. The sense of being able to run the test with time = NULL (random values) is to test the rate of false positives under the current residual structure (random time corresponds to H0: no spatial autocorrelation), e.g. to check if the test has noninal error rates for particular residual structures (note that Durbin-Watson originally assumes normal residuals, error rates seem correct for uniform residuals, but may not be correct if there are still other residual problems).
2438 #'
2439 #' Testing for temporal autocorrelation requires unique time values - if you have several observations per time value, either use the recalculateResiduals function to aggregate residuals per time step, or extract the residuals from the fitted object, and plot / test each of them independently for temporally repeated subgroups (typical choices would be location / subject etc.). Note that the latter must be done by hand, outside testSpatialAutocorrelation.
2440 #'
2441 #' @note Important to note for all autocorrelation tests (spatial / temporal): the autocorrelation tests are valid to check for residual autocorrelation in models that don't assume such a correlation (in this case, you can use conditional or unconditional simulations), or if there is remaining residual autocorrelation after accounting for it in a spatial/temporal model (in that case, you have to use conditional simulations), but if checking unconditional simulations from a model with an autocorrelation structure on data that corresponds to this model, they will be significant, even if the model fully accounts for this structure.
2442 #'
2443 #' This behavior is not really a bug, but rather originates from the definition of the quantile residuals: quantile residuals are calculated independently per data point, i.e. without consideratin of any correlation structure between data points that may exist in the simulations. As a result, the simulated distributions from a unconditional simulaton will typically not reflect the correlation structure that is present in each single simulation, and the same is true for the subsequently calculated quantile residuals.
2444 #'
2445 #' The bottomline here is that spatial / temporal / other autoregressive models should either be tested based on conditional simulations, or (ideally) custom tests should be used that are not based on quantile residuals, but rather compare the correlation structure in the simulated data with the correlation structure in the observed data.
2446 #'
2447 #' @author Florian Hartig
2448 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}}
2449 #' @example inst/examples/testTemporalAutocorrelationHelp.R
2450 #' @export
2451 testTemporalAutocorrelation <- function(simulationOutput, time = NULL , alternative = c("two.sided", "greater", "less"), plot = T){
2452
2453 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
2454
2455 # actually not sure if this is neccessary for dwtest, but seems better to aggregate
2456 if(any(duplicated(time))) stop("testing for temporal autocorrelation requires unique time values - if you have several observations per time value, either use the recalculateResiduals function to aggregate residuals per time step, or extract the residuals from the fitted object, and plot / test each of them independently for temporally repeated subgroups (typical choices would be location / subject etc.). Note that the latter must be done by hand, outside testSpatialAutocorrelation.")
2457
2458 alternative <- match.arg(alternative)
2459
2460 if(is.null(time)){
2461 time = sample.int(simulationOutput$nObs, simulationOutput$nObs)
2462 message("DHARMa::testTemporalAutocorrelation - no time argument provided, using random times for each data point")
2463 }
2464
2465 out = lmtest::dwtest(simulationOutput$scaledResiduals ~ 1, order.by = time, alternative = alternative)
2466
2467 if(plot == T) {
2468 oldpar <- par(mfrow = c(1,2))
2469 on.exit(par(oldpar))
2470
2471 plot(simulationOutput$scaledResiduals[order(time)] ~ time[order(time)],
2472 type = "l", ylab = "Scaled residuals", xlab = "Time", main = "Residuals vs. time")
2473 acf(simulationOutput$scaledResiduals[order(time)], main = "Autocorrelation")
2474 legend("topright",
2475 c(paste(out$method, " p=", round(out$p.value, digits = 5)),
2476 paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))),
2477 text.col = ifelse(out$p.value < 0.05, "red", "black" ), bty="n")
2478
2479 }
2480
2481 return(out)
2482 }
2483
2484
2485 #' Test for spatial autocorrelation
2486 #'
2487 #' This function performs a standard test for spatial autocorrelation on the simulated residuals
2488 #'
2489 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa
2490 #' @param x the x coordinate, in the same order as the data points. If not provided, random values will be created
2491 #' @param y the y coordinate, in the same order as the data points. If not provided, random values will be created
2492 #' @param distMat optional distance matrix. If not provided, a distance matrix will be calculated based on x and y. See details for explanation
2493 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis
2494 #' @param plot whether to plot output
2495 #' @details The function performs Moran.I test from the package ape, based on the provided distance matrix of the data points.
2496 #'
2497 #' There are several ways to specify this distance. If a distance matrix (distMat) is provided, calculations will be based on this distance matrix, and x,y coordinates will only used for the plotting (if provided)
2498 #' If distMat is not provided, the function will calculate the euclidian distances between x,y coordinates, and test Moran.I based on these distances.
2499 #'
2500 #' If no x/y values are provided, random values will be created. The sense of being able to run the test with x/y = NULL (random values) is to test the rate of false positives under the current residual structure (random x/y corresponds to H0: no spatial autocorrelation), e.g. to check if the test has nominal error rates for particular residual structures.
2501 #'
2502 #' Testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, either use the recalculateResiduals function to aggregate residuals per location, or extract the residuals from the fitted object, and plot / test each of them independently for spatially repeated subgroups (a typical scenario would repeated spatial observation, in which case one could plot / test each time step separately for temporal autocorrelation). Note that the latter must be done by hand, outside testSpatialAutocorrelation.
2503 #'
2504 #' @note Important to note for all autocorrelation tests (spatial / temporal): the autocorrelation tests are valid to check for residual autocorrelation in models that don't assume such a correlation (in this case, you can use conditional or unconditional simulations), or if there is remaining residual autocorrelation after accounting for it in a spatial/temporal model (in that case, you have to use conditional simulations), but if checking unconditional simulations from a model with an autocorrelation structure on data that corresponds to this model, they will be significant, even if the model fully accounts for this structure.
2505 #'
2506 #' This behavior is not really a bug, but rather originates from the definition of the quantile residuals: quantile residuals are calculated independently per data point, i.e. without consideratin of any correlation structure between data points that may exist in the simulations. As a result, the simulated distributions from a unconditional simulaton will typically not reflect the correlation structure that is present in each single simulation, and the same is true for the subsequently calculated quantile residuals.
2507 #'
2508 #' The bottomline here is that spatial / temporal / other autoregressive models should either be tested based on conditional simulations, or (ideally) custom tests should be used that are not based on quantile residuals, but rather compare the correlation structure in the simulated data with the correlation structure in the observed data.
2509 #'
2510 #' @author Florian Hartig
2511 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testQuantiles}}
2512 #' @import grDevices
2513 #' @example inst/examples/testSpatialAutocorrelationHelp.R
2514 #' @export
2515 testSpatialAutocorrelation <- function(simulationOutput, x = NULL, y = NULL, distMat = NULL, alternative = c("two.sided", "greater", "less"), plot = T){
2516
2517 alternative <- match.arg(alternative)
2518 data.name = deparse(substitute(simulationOutput)) # needs to be before ensureDHARMa
2519 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
2520
2521 if(any(duplicated(cbind(x,y)))) stop("testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, either use the recalculateResiduals function to aggregate residuals per location, or extract the residuals from the fitted object, and plot / test each of them independently for spatially repeated subgroups (a typical scenario would repeated spatial observation, in which case one could plot / test each time step separately for temporal autocorrelation). Note that the latter must be done by hand, outside testSpatialAutocorrelation.")
2522
2523 if( (!is.null(x) | !is.null(y)) & !is.null(distMat) ) message("both coordinates and distMat provided, calculations will be done based on the distance matrix, coordinates will only be used for plotting")
2524 # if not provided, fill x and y with random numbers (Null model)
2525 if(is.null(x)){
2526 x = runif(simulationOutput$nObs, -1,1)
2527 message("DHARMa::testSpatialAutocorrelation - no x coordinates provided, using random values for each data point")
2528 }
2529
2530 if(is.null(y)){
2531 y = runif(simulationOutput$nObs, -1,1)
2532 message("DHARMa::testSpatialAutocorrelation - no x coordinates provided, using random values for each data point")
2533 }
2534
2535 # if not provided, create distance matrix based on x and y
2536 if(is.null(distMat)) distMat <- as.matrix(dist(cbind(x, y)))
2537
2538 invDistMat <- 1/distMat
2539 diag(invDistMat) <- 0
2540
2541 MI = ape::Moran.I(simulationOutput$scaledResiduals, weight = invDistMat, alternative = alternative)
2542
2543 out = list()
2544 out$statistic = c(observed = MI$observed, expected = MI$expected, sd = MI$sd)
2545 out$method = "DHARMa Moran's I test for spatial autocorrelation"
2546 out$alternative = "Spatial autocorrelation"
2547 out$p.value = MI$p.value
2548 out$data.name = data.name
2549
2550 class(out) = "htest"
2551
2552
2553
2554 if(plot == T) {
2555 opar <- par(mfrow = c(1,1))
2556 on.exit(par(opar))
2557
2558 col = colorRamp(c("red", "white", "blue"))(simulationOutput$scaledResiduals)
2559 plot(x,y, col = rgb(col, maxColorValue = 255), main = out$method, cex.main = 0.8 )
2560
2561 # TODO implement correlogram
2562 }
2563
2564 if(plot == T) {
2565
2566
2567 }
2568 return(out)
2569 }
2570
2571
2572 getP <- function(simulated, observed, alternative){
2573
2574 if(alternative == "greater") p = mean(simulated >= observed)
2575 if(alternative == "less") p = mean(simulated <= observed)
2576 if(alternative == "two.sided") p = min(min(mean(simulated <= observed), mean(simulated >= observed) ) * 2,1)
2577
2578 return(p)
2579 }
2580
2581
2582
2583 ####################### tests.R
2584
2585 ####################### compatibility.R
2586
2587
2588 # New S3 methods
2589
2590 #' Get model response
2591 #'
2592 #' Extract the response of a fitted model
2593 #'
2594 #' The purpose of this function is to savely extract the response (dependent variable) of the fitted model classes
2595 #'
2596 #' @param object a fitted model
2597 #' @param ... additional parameters
2598 #'
2599 #' @example inst/examples/wrappersHelp.R
2600 #'
2601 #' @seealso \code{\link{getRefit}}, \code{\link{getSimulations}}, \code{\link{getFixedEffects}}, \code{\link{getFitted}}
2602 #' @author Florian Hartig
2603 #' @export
2604 getObservedResponse <- function (object, ...) {
2605 UseMethod("getObservedResponse", object)
2606 }
2607
2608 #' @rdname getObservedResponse
2609 #' @export
2610 getObservedResponse.default <- function (object, ...){
2611 out = model.frame(object)[,1]
2612
2613 # check for weights in k/n case
2614 if(family(object)$family %in% c("binomial", "betabinomial") & "(weights)" %in% colnames(model.frame(object))){
2615 x = model.frame(object)
2616 out = out * x$`(weights)`
2617 }
2618
2619 # check for k/n binomial
2620 if(is.matrix(out)){
2621 if(!(ncol(out) == 2)) securityAssertion("nKcase - wrong dimensions of response")
2622 if(!(family(object)$family %in% c("binomial", "betabinomial"))) securityAssertion("nKcase - wrong family")
2623
2624 out = out[,1]
2625 }
2626
2627 # observation is factor - unlike lme4 and older, glmmTMB simulates nevertheless as numeric
2628 if(is.factor(out)) out = as.numeric(out) - 1
2629
2630 return(out)
2631 }
2632
2633 weightsWarning = "Model was fit with prior weights. These will be ignored in the simulation. See ?getSimulations for details"
2634
2635 #' Get model simulations
2636 #'
2637 #' Wrapper to simulate from a fitted model
2638 #'
2639 #' The purpose of this wrapper for for the simulate function is to return the simulations from a model in a standardized way
2640 #'
2641 #' @param object a fitted model
2642 #' @param nsim number of simulations
2643 #' @param type if simulations should be prepared for getQuantile or for refit
2644 #' @param ... additional parameters to be passed on, usually to the simulate function of the respective model class
2645 #'
2646 #' @return a matrix with simulations
2647 #' @example inst/examples/wrappersHelp.R
2648 #'
2649 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getRefit}}, \code{\link{getFixedEffects}}, \code{\link{getFitted}}
2650 #'
2651 #' @details The function is a wrapper for for the simulate function is to return the simulations from a model in a standardized way.
2652 #'
2653 #' Note: if the model was fit with weights, the function will throw a warning if used with a model class whose simulate function does not include the weightsi in the simulations. Note that the results may or may not be appropriate in this case, depending on how you use the weights.
2654 #'
2655 #'
2656 #' @author Florian Hartig
2657 #' @export
2658 getSimulations <- function (object, nsim = 1 , type = c("normal", "refit"), ...) {
2659 UseMethod("getSimulations", object)
2660 }
2661
2662 #' @rdname getSimulations
2663 #' @export
2664 getSimulations.default <- function (object, nsim = 1, type = c("normal", "refit"), ...){
2665
2666 type <- match.arg(type)
2667
2668 out = simulate(object, nsim = nsim , ...)
2669
2670 if (type == "normal"){
2671 if(family(object)$family %in% c("binomial", "betabinomial")){
2672 if("(weights)" %in% colnames(model.frame(object))){
2673 x = model.frame(object)
2674 out = out * x$`(weights)`
2675 } else if (is.matrix(out[[1]])){
2676 # this is for the k/n binomial case
2677 out = as.matrix(out)[,seq(1, (2*nsim), by = 2)]
2678 } else if(is.factor(out[[1]])){
2679 if(nlevels(out[[1]]) != 2){
2680 warning("The fitted model has a factorial response with number of levels not equal to 2 - there is currently no sensible application in DHARMa that would lead to this situation. Likely, you are trying something that doesn't work.")
2681 }
2682 else{
2683 out = data.matrix(out) - 1
2684 }
2685 }
2686 }
2687
2688 if(!is.matrix(out)) out = data.matrix(out)
2689 } else{
2690 if(family(object)$family %in% c("binomial", "betabinomial")){
2691 if (!is.matrix(out[[1]]) & !is.numeric(out)) data.frame(data.matrix(out)-1)
2692 }
2693 }
2694
2695 return(out)
2696 }
2697
2698
2699 #' Extract fixed effects of a supported model
2700 #'
2701 #' A wrapper to extract fixed effects of a supported model
2702 #'
2703 #' @param fittedModel a fitted model
2704 #'
2705 #' @example inst/examples/wrappersHelp.R
2706 #'
2707 #' @importFrom lme4 fixef
2708 #'
2709 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getRefit}}, \code{\link{getFitted}}
2710 #' @export
2711 getFixedEffects <- function(fittedModel){
2712
2713 if(class(fittedModel)[1] %in% c("glm", "lm", "gam", "bam", "negbin") ){
2714 out = coef(fittedModel)
2715 } else if(class(fittedModel)[1] %in% c("glmerMod", "lmerMod", "HLfit")){
2716 out = fixef(fittedModel)
2717 } else if(class(fittedModel)[1] %in% c("glmmTMB")){
2718 out = glmmTMB::fixef(fittedModel)
2719 out = out$cond
2720 } else {
2721 out = coef(fittedModel)
2722 if(is.null(out)) out = fixef(fittedModel)
2723 }
2724 return(out)
2725 }
2726
2727 #' Get model refit
2728 #'
2729 #' Wrapper to refit a fitted model
2730 #'
2731 #' @param object a fitted model
2732 #' @param newresp the new response that should be used to refit the model
2733 #' @param ... additional parameters to be passed on to the refit or update class that is used to refit the model
2734 #'
2735 #' @details The purpose of this wrapper is to standardize the refit of a model. The behavior of this function depends on the supplied model. When available, it uses the refit method, otherwise it will use update. For glmmTMB: since version 1.0, glmmTMB has a refit function, but this didn't work, so I switched back to this implementation, which is a hack based on the update function.
2736 #'
2737 #' @example inst/examples/wrappersHelp.R
2738 #'
2739 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getFixedEffects}}
2740 #' @author Florian Hartig
2741 #' @export
2742 getRefit <- function (object, newresp, ...) {
2743 UseMethod("getRefit", object)
2744 }
2745
2746 #' @rdname getRefit
2747 #'
2748 #' @importFrom lme4 refit
2749 #'
2750 #' @export
2751 getRefit.default <- function (object, newresp, ...){
2752 refit(object, newresp, ...)
2753 }
2754
2755 #' Get model fitted
2756 #'
2757 #' Wrapper to get the fitted value a fitted model
2758 #'
2759 #' The purpose of this wrapper is to standardize extract the fitted values
2760 #'
2761 #' @param object a fitted model
2762 #' @param ... additional parameters to be passed on, usually to the simulate function of the respective model class
2763 #'
2764 #' @example inst/examples/wrappersHelp.R
2765 #'
2766 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getRefit}}, \code{\link{getFixedEffects}}
2767 #'
2768 #' @author Florian Hartig
2769 #' @export
2770 getFitted <- function (object, ...) {
2771 UseMethod("getFitted", object)
2772 }
2773
2774 #' @rdname getFitted
2775 #' @export
2776 getFitted.default <- function (object,...){
2777 fitted(object, ...)
2778 }
2779
2780 #' has NA
2781 #'
2782 #' checks if the fitted model excluded NA values
2783 #'
2784 #' @param object a fitted model
2785 #'
2786 #' @details Checks if the fitted model excluded NA values
2787 #'
2788 #' @export
2789
2790
2791 # hasNA <- function(object){
2792 # x = rownames(model.frame(object))
2793 # if(length(x) < as.numeric(x[length(x) ])) return(TRUE)
2794 # else return(FALSE)
2795 # }
2796
2797 ######### LM #############
2798
2799 #' @rdname getRefit
2800 #' @export
2801 getRefit.lm <- function(object, newresp, ...){
2802
2803 newData <-model.frame(object)
2804
2805 if(is.vector(newresp)){
2806 newData[,1] = newresp
2807 } else if (is.factor(newresp)){
2808 # Hack to make the factor binomial case work
2809 newData[,1] = as.numeric(newresp) - 1
2810 } else {
2811 # Hack to make the binomial n/k case work
2812 newData[[1]] = NULL
2813 newData = cbind(newresp, newData)
2814 }
2815
2816 refittedModel = update(object, data = newData)
2817 return(refittedModel)
2818 }
2819
2820
2821 hasWeigths.lm <- function(object, ...){
2822 if(length(unique(object$prior.weights)) != 1) return(TRUE)
2823 else return(FALSE)
2824 }
2825
2826
2827 ######### GLM #############
2828
2829 #' @rdname getSimulations
2830 #' @export
2831 getSimulations.negbin<- function (object, nsim = 1, type = c("normal", "refit"), ...){
2832 if("(weights)" %in% colnames(model.frame(object))) warning(weightsWarning)
2833 getSimulations.default(object = object, nsim = nsim, type = type, ...)
2834 }
2835
2836
2837 ######## MGCV ############
2838
2839 # This function overwrites the standard fitted function for GAM
2840
2841 #' @rdname getFitted
2842 #' @export
2843 getFitted.gam <- function(object, ...){
2844 class(object) = "glm"
2845 out = stats::fitted(object, ...)
2846 names(out) = as.character(1:length(out))
2847 out
2848 }
2849
2850 # Check that this works
2851 # plot(fitted(fittedModelGAM), predict(fittedModelGAM, type = "response"))
2852
2853
2854 ######## lme4 ############
2855
2856
2857 #' @rdname getSimulations
2858 #' @export
2859 getSimulations.lmerMod <- function (object, nsim = 1, type = c("normal", "refit"), ...){
2860
2861 if("(weights)" %in% colnames(model.frame(object))) warning(weightsWarning)
2862 getSimulations.default(object = object, nsim = nsim, type = type, ...)
2863 }
2864
2865
2866 ######## glmmTMB ######
2867
2868 #' @rdname getRefit
2869 #' @export
2870 getRefit.glmmTMB <- function(object, newresp, ...){
2871 newData <-model.frame(object)
2872
2873 # hack to make update work - for some reason, glmmTMB wants the matrix embedded in the df for update to work ... should be solved ideally, see https://github.com/glmmTMB/glmmTMB/issues/549
2874 if(is.matrix(newresp)){
2875 tmp = colnames(newData[[1]])
2876 newData[[1]] = NULL
2877 newData = cbind(newresp, newData)
2878 colnames(newData)[1:2] = tmp
2879 } else {
2880 newData[[1]] = newresp
2881 }
2882 refittedModel = update(object, data = newData)
2883 return(refittedModel)
2884 }
2885
2886
2887 # glmmTMB simulates normal counts (and not proportions in any case, so the check for the other models is not needed), see #158
2888 # note that if observation is factor - unlike lme4 and older, glmmTMB simulates nevertheless as numeric
2889
2890 #' @rdname getSimulations
2891 #' @export
2892 getSimulations.glmmTMB <- function (object, nsim = 1, type = c("normal", "refit"), ...){
2893
2894 if("(weights)" %in% colnames(model.frame(object)) & ! family(object)$family %in% c("binomial", "betabinomial")) warning(weightsWarning)
2895
2896 type <- match.arg(type)
2897
2898 out = simulate(object, nsim = nsim, ...)
2899
2900 if (type == "normal"){
2901 if (is.matrix(out[[1]])){
2902 # this is for the k/n binomial case
2903 out = as.matrix(out)[,seq(1, (2*nsim), by = 2)]
2904 }
2905 if(!is.matrix(out)) out = data.matrix(out)
2906 }else{
2907
2908 # check for weights in k/n case
2909 if(family(object)$family %in% c("binomial", "betabinomial")){
2910 if("(weights)" %in% colnames(model.frame(object))){
2911 w = model.frame(object)
2912 w = w$`(weights)`
2913 tmp <- function(x)x/w
2914 out = apply(out, 2, tmp)
2915 out = as.data.frame(out)
2916 }
2917 else if(is.matrix(out[[1]]) & !is.matrix(model.frame(object)[[1]])){
2918 out = as.data.frame(as.matrix(out)[,seq(1, (2*nsim), by = 2)])
2919 }
2920 }
2921
2922
2923
2924
2925
2926
2927 # matrixResp =
2928 #
2929 # if(matrixResp & !is.null(ncol(newresp))){
2930 # # Hack to make the factor binomial case work
2931 # tmp = colnames(newData[[1]])
2932 # newData[[1]] = NULL
2933 # newData = cbind(newresp, newData)
2934 # colnames(newData)[1:2] = tmp
2935 # } else if(!is.null(ncol(newresp))){
2936 # newData[[1]] = newresp[,1]
2937 # } else {
2938 # newData[[1]] = newresp
2939 # }
2940
2941
2942 # if (out$modelClass == "glmmTMB" & ncol(simulations) == 2*n) simObserved = simulations[,(1+(2*(i-1))):(2+(2*(i-1)))]
2943 }
2944
2945 # else securityAssertion("Simulation results produced unsupported data structure", stop = TRUE)
2946
2947 return(out)
2948 }
2949
2950 ####### spaMM #########
2951
2952 #' @rdname getObservedResponse
2953 #' @export
2954 getObservedResponse.HLfit <- function(object, ...){
2955 out = spaMM::response(object, ...)
2956
2957 nKcase = is.matrix(out)
2958 if(nKcase){
2959 if(! (family(object) %in% c("binomial", "betabinomial"))) securityAssertion("nKcase - wrong family")
2960 if(! (ncol(out)==2)) securityAssertion("nKcase - wrong dimensions of response")
2961 out = out[,1]
2962 }
2963
2964 if(!is.numeric(out)) out = as.numeric(out)
2965
2966 return(out)
2967
2968 }
2969
2970 #' @rdname getSimulations
2971 #' @export
2972 getSimulations.HLfit <- function(object, nsim = 1, type = c("normal", "refit"), ...){
2973
2974 type <- match.arg(type)
2975
2976 capture.output({out = simulate(object, nsim = nsim, ...)})
2977
2978 if(type == "normal"){
2979 if(!is.matrix(out)) out = data.matrix(out)
2980 }else{
2981 out = as.data.frame(out)
2982 }
2983 return(out)
2984 }
2985
2986 #' @rdname getRefit
2987 #' @export
2988 getRefit.HLfit <- function(object, newresp, ...) {
2989 spaMM::update_resp(object, newresp, evaluate = TRUE)
2990 }
2991
2992 ####################### compatibility.R
2993
2994 ####################### helper.R
2995
2996 #' Modified ECDF function
2997 #'
2998 #' @details ensures symmetric ECDF (standard ECDF is <), and that 0 / 1 values are only produced if the data is strictly < > than the observed data
2999 #'
3000 #' @keywords internal
3001 DHARMa.ecdf <- function (x)
3002 {
3003 x <- sort(x)
3004 n <- length(x)
3005 if (n < 1)
3006 stop(paste("DHARMa.ecdf - length vector < 1", x))
3007 vals <- unique(x)
3008 rval <- approxfun(vals, cumsum(tabulate(match(x, vals)))/ (n +1),
3009 method = "linear", yleft = 0, yright = 1, ties = "ordered")
3010 class(rval) <- c("ecdf", "stepfun", class(rval))
3011 assign("nobs", n, envir = environment(rval))
3012 attr(rval, "call") <- sys.call()
3013 rval
3014 }
3015
3016
3017
3018 #' calculate quantiles
3019 #'
3020 #' calculates residual quantiles from a given simulation
3021 #'
3022 #' @param simulations a matrix with simulations from a fitted model. Rows = observations, columns = replicate simulations
3023 #' @param observed a vector with the observed data
3024 #' @param integerResponse is the response integer-valued. Only has an effect for method = "traditional"
3025 #' @param method the quantile randomization method used. See details
3026 #'
3027 #' @details The function calculates residual quantiles from the simulated data. For continous distributions, this will simply the the value of the ecdf.
3028 #'
3029 #' For discrete data, there are two options implemented.
3030 #'
3031 #' The current default (available since DHARMa 0.3.1) are probability integral transform (PIT-) residuals (Smith, 1985; Dunn & Smyth, 1996; see also see also Warton, et al., 2017).
3032 #'
3033 #' Before DHARMa 0.3.1, a different randomization procedure was used, in which the a U(-0.5, 0.5) distribution was added on observations and simualtions for discrete distributions. For a completely discrete distribution, the two procedures should deliver equivalent results, but the second method has the disadvantage that a) one has to know if the distribution is discrete (DHARMa tries to recognize this automatically), and b) that it leads to inefficiencies for some distributions such as the the Tweedie, which are partly continous, partly discrte (see e.g. https://github.com/florianhartig/DHARMa/issues/168).
3034 #'
3035 #' @references
3036 #'
3037 #' Smith, J. Q. "Diagnostic checks of non-standard time series models." Journal of Forecasting 4.3 (1985): 283-291.
3038 #'
3039 #' Dunn, P.K., & Smyth, G.K. (1996). Randomized quantile residuals. Journal of Computational and Graphical Statistics 5, 236-244.
3040 #'
3041 #' Warton, David I., Loïc Thibaut, and Yi Alice Wang. "The PIT-trap—A “model-free” bootstrap procedure for inference about regression models with discrete, multivariate responses." PloS one 12.7 (2017)
3042 #'
3043 #' @export
3044 getQuantile <- function(simulations, observed, integerResponse, method = c("PIT", "traditional")){
3045
3046 method = match.arg(method)
3047
3048 n = length(observed)
3049 if (nrow(simulations) != n) stop("DHARMa::getquantile: wrong dimension of simulations")
3050 nSim = ncol(simulations)
3051
3052
3053 if(method == "traditional"){
3054
3055 if(integerResponse == F){
3056
3057 if(any(duplicated(observed))) message("Model family was recognized or set as continuous, but duplicate values were detected in the response. Consider if you are fitting an appropriate model.")
3058
3059 values = as.vector(simulations)[duplicated(as.vector(simulations))]
3060 if(length(values) > 0){
3061 if (all(values%%1==0)){
3062 integerResponse = T
3063 message("Model family was recognized or set as continuous, but duplicate values were detected in the simulation - changing to integer residuals (see ?simulateResiduals for details)")
3064 } else {
3065 message("Duplicate non-integer values found in the simulation. If this is because you are fitting a non-inter valued discrete response model, note that DHARMa does not perform appropriate randomization for such cases.")
3066 }
3067
3068 }
3069 }
3070
3071 scaledResiduals = rep(NA, n)
3072 for (i in 1:n){
3073 if(integerResponse == T){
3074 scaledResiduals[i] <- DHARMa.ecdf(simulations[i,] + runif(nSim, -0.5, 0.5))(observed[i] + runif(1, -0.5, 0.5))
3075 }else{
3076 scaledResiduals[i] <- DHARMa.ecdf(simulations[i,])(observed[i])
3077 }
3078 }
3079
3080 } else {
3081
3082
3083 scaledResiduals = rep(NA, n)
3084 for (i in 1:n){
3085 min <- sum(simulations[i,] < observed[i]) / length(simulations[i,])
3086 max <- sum(simulations[i,] <= observed[i]) / length(simulations[i,])
3087 if (min == max) scaledResiduals[i] = DHARMa.ecdf(simulations[i,])(observed[i])
3088 else{
3089 scaledResiduals[i] = runif(1, min, max)
3090 }
3091 }
3092 }
3093
3094 return(scaledResiduals)
3095 }
3096
3097 #
3098 #
3099 # testData = createData(sampleSize = 200, family = gaussian(),
3100 # randomEffectVariance = 0, numGroups = 5)
3101 # fittedModel <- glmmTMB(observedResponse ~ Environment1,
3102 # data = testData)
3103 # simulationOutput <- simulateResiduals(fittedModel = fittedModel)
3104 #
3105 # sims = simulationOutput$simulatedResponse
3106 # sims[1, c(1,6,8)] = 0
3107 # any(apply(sims, 1, anyDuplicated))
3108 # getQuantile(simulations = sims, observed = testData$observedResponse, n = 200, integerResponse = F, nSim = 250)
3109 #
3110 #
3111 #
3112
3113
3114
3115 #' Check dot operator
3116 #'
3117 #' @param name variable name
3118 #' @param default variable default
3119 #'
3120 #' @details modified from https://github.com/lcolladotor/dots
3121 #'
3122 #' @keywords internal
3123 checkDots <- function(name, default, ...) {
3124 args <- list(...)
3125 if(!name %in% names(args)) {
3126 ## Default value
3127 return(default)
3128 } else {
3129 ## If the argument was defined in the ... part, return it
3130 return(args[[name]])
3131 }
3132 }
3133
3134
3135 securityAssertion <- function(context = "Not provided", stop = F){
3136 generalMessage = "Message from DHARMa: During the execution of a DHARMa function, some unexpected conditions occurred. Even if you didn't get an error, your results may not be reliable. Please check with the help if you use the functions as intended. If you think that the error is not on your side, I would be grateful if you could report the problem at https://github.com/florianhartig/DHARMa/issues \n\n Context:"
3137 if (stop == F) warning(paste(generalMessage, context))
3138 else stop(paste(generalMessage, context))
3139 }
3140
3141 ####################### helper.R
3142
3143 ####################### plot.R
3144
3145 #' DHARMa standard residual plots
3146 #'
3147 #' This function creates standard plots for the simulated residuals
3148 #' @param x an object with simulated residuals created by \code{\link{simulateResiduals}}
3149 #' @param rank if T (default), the values of pred will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed.
3150 #' @param ... further options for \code{\link{plotResiduals}}. Consider in particular parameters quantreg, rank and asFactor. xlab, ylab and main cannot be changed when using plotSimulatedResiduals, but can be changed when using plotResiduals.
3151 #' @details The function creates two plots. To the left, a qq-uniform plot to detect deviations from overall uniformity of the residuals (calling \code{\link{plotQQunif}}), and to the right, a plot of residuals against predicted values (calling \code{\link{plotResiduals}}). Outliers are highlighted in red (for more on outliers, see \code{\link{testOutliers}}). For a correctly specified model, we would expect
3152 #'
3153 #' a) a straight 1-1 line in the uniform qq-plot -> evidence for an overall uniform (flat) distribution of the residuals
3154 #'
3155 #' b) uniformity of residuals in the vertical direction in the res against predictor plot
3156 #'
3157 #' Deviations of this can be interpreted as for a linear regression. See the vignette for detailed examples.
3158 #'
3159 #' To provide a visual aid in detecting deviations from uniformity in y-direction, the plot of the residuals against the predicted values also performs an (optional) quantile regression, which provides 0.25, 0.5 and 0.75 quantile lines across the plots. These lines should be straight, horizontal, and at y-values of 0.25, 0.5 and 0.75. Note, however, that some deviations from this are to be expected by chance, even for a perfect model, especially if the sample size is small. See further comments on this plot, its interpretation and options, in \code{\link{plotResiduals}}
3160 #'
3161 #' The quantile regression can take some time to calculate, especially for larger datasets. For that reason, quantreg = F can be set to produce a smooth spline instead. This is default for n > 2000.
3162 #'
3163 #' @seealso \code{\link{plotResiduals}}, \code{\link{plotQQunif}}
3164 #' @example inst/examples/plotsHelp.R
3165 #' @import graphics
3166 #' @import utils
3167 #' @export
3168 plot.DHARMa <- function(x, rank = TRUE, ...){
3169
3170 oldpar <- par(mfrow = c(1,2), oma = c(0,1,2,1))
3171 on.exit(par(oldpar))
3172
3173 plotQQunif(x)
3174 plotResiduals(x, rank = rank, ...)
3175
3176 mtext("DHARMa residual diagnostics", outer = T)
3177 }
3178
3179
3180 #' Histogram of DHARMa residuals
3181 #'
3182 #' The function produces a histogram from a DHARMa output
3183 #'
3184 #' @param x a DHARMa simulation output (class DHARMa)
3185 #' @param breaks breaks for hist() function
3186 #' @param col col for hist bars
3187 #' @param main plot main
3188 #' @param xlab plot xlab
3189 #' @param cex.main plot cex.main
3190 #' @param ... other arguments to be passed on to hist
3191 #' @seealso \code{\link{plotSimulatedResiduals}}, \code{\link{plotResiduals}}
3192 #' @example inst/examples/plotsHelp.R
3193 #' @export
3194 hist.DHARMa <- function(x,
3195 breaks = seq(-0.02, 1.02, len = 53),
3196 col = c("red",rep("lightgrey",50), "red"),
3197 main = "Hist of DHARMa residuals",
3198 xlab = "Residuals (outliers are marked red)",
3199 cex.main = 1,
3200 ...){
3201
3202 x = ensureDHARMa(x, convert = T)
3203
3204 val = x$scaledResiduals
3205 val[val == 0] = -0.01
3206 val[val == 1] = 1.01
3207
3208 hist(val, breaks = breaks, col = col, main = main, xlab = xlab, cex.main = cex.main, ...)
3209 }
3210
3211
3212 #' DHARMa standard residual plots
3213 #'
3214 #' DEPRECATED, use plot() instead
3215 #'
3216 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}}
3217 #' @param ... further options for \code{\link{plotResiduals}}. Consider in particular parameters quantreg, rank and asFactor. xlab, ylab and main cannot be changed when using plotSimulatedResiduals, but can be changed when using plotResiduals.
3218 #' @note This function is deprecated. Use \code{\link{plot.DHARMa}}
3219 #'
3220 #' @seealso \code{\link{plotResiduals}}, \code{\link{plotQQunif}}
3221 #' @export
3222 plotSimulatedResiduals <- function(simulationOutput, ...){
3223 message("plotSimulatedResiduals is deprecated, please switch your code to simply using the plot() function")
3224 plot(simulationOutput, ...)
3225 }
3226
3227
3228 #' Quantile-quantile plot for a uniform distribution
3229 #'
3230 #' The function produces a uniform quantile-quantile plot from a DHARMa output
3231 #'
3232 #' @param simulationOutput a DHARMa simulation output (class DHARMa)
3233 #' @param testUniformity if T, the function \code{\link{testUniformity}} will be called and the result will be added to the plot
3234 #' @param testOutliers if T, the function \code{\link{testOutliers}} will be called and the result will be added to the plot
3235 #' @param testDispersion if T, the function \code{\link{testDispersion}} will be called and the result will be added to the plot
3236 #' @param ... arguments to be passed on to \code{\link[gap]{qqunif}}
3237 #'
3238 #' @details the function calls qqunif from the R package gap to create a quantile-quantile plot for a uniform distribution.
3239 #' @seealso \code{\link{plotSimulatedResiduals}}, \code{\link{plotResiduals}}
3240 #' @example inst/examples/plotsHelp.R
3241 #' @export
3242 plotQQunif <- function(simulationOutput, testUniformity = T, testOutliers = T, testDispersion = T, ...){
3243
3244 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model")
3245
3246 gap::qqunif(simulationOutput$scaledResiduals,pch=2,bty="n", logscale = F, col = "black", cex = 0.6, main = "QQ plot residuals", cex.main = 1, ...)
3247
3248 if(testUniformity == TRUE){
3249 temp = testUniformity(simulationOutput, plot = F)
3250 legend("topleft", c(paste("KS test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n")
3251 }
3252
3253 if(testOutliers == TRUE){
3254 temp = testOutliers(simulationOutput, plot = F)
3255 legend("bottomright", c(paste("Outlier test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n")
3256 }
3257
3258 if(testDispersion == TRUE){
3259 temp = testDispersion(simulationOutput, plot = F)
3260 legend("center", c(paste("Dispersion test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n")
3261 }
3262
3263 }
3264
3265
3266 #' Generic res ~ pred scatter plot with spline or quantile regression on top
3267 #'
3268 #' The function creates a generic residual plot with either spline or quantile regression to highlight patterns in the residuals. Outliers are highlighted in red.
3269 #'
3270 #' @param simulationOutput on object, usually a DHARMa object, from which residual values can be extracted. Alternatively, a vector with residuals or a fitted model can be provided, which will then be transformed into a DHARMa object.
3271 #' @param form optional predictor against which the residuals should be plotted. Default is to used the predicted(simulationOutput)
3272 #' @param quantreg whether to perform a quantile regression on 0.25, 0.5, 0.75 on the residuals. If F, a spline will be created instead. Default NULL chooses T for nObs < 2000, and F otherwise.
3273 #' @param rank if T, the values provided in form will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed. If form is a factor, this has no effect.
3274 #' @param asFactor should a numeric predictor provided in form be treated as a factor. Default is to choose this for < 10 unique values, as long as enough predictions are available to draw a boxplot.
3275 #' @param smoothScatter if T, a smooth scatter plot will plotted instead of a normal scatter plot. This makes sense when the number of residuals is very large. Default NULL chooses T for nObs < 10000, and F otherwise.
3276 #' @param quantiles for a quantile regression, which quantiles should be plotted
3277 #' @param ... additional arguments to plot / boxplot.
3278 #' @details The function plots residuals against a predictor (by default against the fitted value, extracted from the DHARMa object, or any other predictor).
3279 #'
3280 #' Outliers are highlighted in red (for information on definition and interpretation of outliers, see \code{\link{testOutliers}}).
3281 #'
3282 #' To provide a visual aid in detecting deviations from uniformity in y-direction, the plot function calculates an (optional) quantile regression, which compares the empirical 0.25, 0.5 and 0.75 quantiles (default) in y direction (red solid lines) with the theoretical 0.25, 0.5 and 0.75 quantiles (dashed black line).
3283 #'
3284 #' Asymptotically (i.e. for lots of data / residuals), if the model is correct, theoretical and the empirical quantiles should be identical (i.e. dashed and solid lines should match). A p-value for the deviation is calculated for each quantile line. Significant deviations are highlighted by red color.
3285 #'
3286 #' If form is a factor, a boxplot will be plotted instead of a scatter plot. The distribution for each factor level should be uniformly distributed, so the box should go from 0.25 to 0.75, with the median line at 0.5. Again, chance deviations from this will increases when the sample size is smaller. You can run null simulations to test if the deviations you see exceed what you would expect from random variation. If you want to create box plots for categorical predictors (e.g. because you only have a small number of unique numeric predictor values), you can convert your predictor with as.factor(pred)
3287 #'
3288 #' @return if quantile tests are performed, the function returns them invisibly.
3289 #'
3290 #' @note The quantile regression can take some time to calculate, especially for larger datasets. For that reason, quantreg = F can be set to produce a smooth spline instead.
3291 #'
3292 #' @seealso \code{\link{plotQQunif}}
3293 #' @example inst/examples/plotsHelp.R
3294 #' @export
3295 plotResiduals <- function(simulationOutput, form = NULL, quantreg = NULL, rank = F, asFactor = NULL, smoothScatter = NULL, quantiles = c(0.25, 0.5, 0.75), ...){
3296
3297
3298 ##### Checks #####
3299
3300
3301 a <- list(...)
3302 a$ylab = checkDots("ylab", "Standardized residual", ...)
3303 if(is.null(form)){
3304 a$xlab = checkDots("xlab", ifelse(rank, "Model predictions (rank transformed)", "Model predictions"), ...)
3305 }
3306
3307 simulationOutput = ensureDHARMa(simulationOutput, convert = T)
3308 res = simulationOutput$scaledResiduals
3309 if(inherits(form, "DHARMa"))stop("DHARMa::plotResiduals > argument form cannot be of class DHARMa. Note that the syntax of plotResiduals has changed since DHARMa 0.3.0. See ?plotResiduals.")
3310
3311 pred = ensurePredictor(simulationOutput, form)
3312
3313 ##### Rank transform and factor conversion#####
3314
3315 if(!is.factor(pred)){
3316
3317 if (rank == T){
3318 pred = rank(pred, ties.method = "average")
3319 pred = pred / max(pred)
3320 }
3321
3322 nuniq = length(unique(pred))
3323 ndata = length(pred)
3324 if(is.null(asFactor)) asFactor = (nuniq == 1) | (nuniq < 10 & ndata / nuniq > 10)
3325 if (asFactor) pred = factor(pred)
3326 }
3327
3328 ##### Residual scatter plots #####
3329
3330 if(is.null(quantreg)) if (length(res) > 2000) quantreg = FALSE else quantreg = TRUE
3331
3332 switchScatter = 10000
3333 if(is.null(smoothScatter)) if (length(res) > switchScatter) smoothScatter = TRUE else smoothScatter = FALSE
3334
3335 blackcol = rgb(0,0,0, alpha = max(0.1, 1 - 3 * length(res) / switchScatter))
3336
3337
3338 # categorical plot
3339 if(is.factor(pred)){
3340 do.call(plot, append(list(res ~ pred, ylim = c(0,1), axes = FALSE), a))
3341 }
3342 # smooth scatter
3343 else if (smoothScatter == TRUE) {
3344 defaultCol = ifelse(res == 0 | res == 1, 2,blackcol)
3345 do.call(graphics::smoothScatter, append(list(x = pred, y = res , ylim = c(0,1), axes = FALSE, colramp = colorRampPalette(c("white", "darkgrey"))),a))
3346 points(pred[defaultCol == 2], res[defaultCol == 2], col = "red", cex = 0.5)
3347 }
3348 # normal plot
3349 else{
3350 defaultCol = ifelse(res == 0 | res == 1, 2,blackcol)
3351 defaultPch = ifelse(res == 0 | res == 1, 8,1)
3352 a$col = checkDots("col", defaultCol, ...)
3353 a$pch = checkDots("pch", defaultPch, ...)
3354 do.call(plot, append(list(res ~ pred, ylim = c(0,1), axes = FALSE), a))
3355 }
3356
3357 axis(1)
3358 axis(2, at=c(0, 0.25, 0.5, 0.75, 1))
3359
3360 ##### Quantile regressions #####
3361
3362 main = checkDots("main", "Residual vs. predicted", ...)
3363 out = NULL
3364
3365 if(is.numeric(pred)){
3366 if(quantreg == F){
3367 title(main = main, cex.main = 1)
3368 abline(h = c(0.25, 0.5, 0.75), col = "black", lwd = 0.5, lty = 2)
3369 try({
3370 lines(smooth.spline(pred, res, df = 10), lty = 2, lwd = 2, col = "red")
3371 abline(h = 0.5, col = "red", lwd = 2)
3372 }, silent = T)
3373 }else{
3374
3375 out = testQuantiles(simulationOutput, pred, quantiles = quantiles, plot = F)
3376
3377
3378 if(any(out$pvals < 0.05, na.rm = TRUE)){
3379 main = paste(main, "Quantile deviations detected (red curves)", sep ="\n")
3380 if(out$p.value <= 0.05){
3381 main = paste(main, "Combined adjusted quantile test significant", sep ="\n")
3382 } else {
3383 main = paste(main, "Combined adjusted quantile test n.s.", sep ="\n")
3384 }
3385 maincol = "red"
3386 } else {
3387 main = paste(main, "No significant problems detected", sep ="\n")
3388 maincol = "black"
3389 }
3390
3391
3392 title(main = main, cex.main = 0.8,
3393 col.main = maincol)
3394
3395 for(i in 1:length(quantiles)){
3396
3397 lineCol = ifelse(out$pvals[i] <= 0.05 & !(is.na(out$pvals[i])), "red", "black")
3398 filCol = ifelse(out$pvals[i] <= 0.05 & !(is.na(out$pvals[i])), "#FF000040", "#00000020")
3399
3400 abline(h = quantiles[i], col = lineCol, lwd = 0.5, lty = 2)
3401 polygon(c(out$predictions$pred, rev(out$predictions$pred)),
3402 c(out$predictions[,2*i] - out$predictions[,2*i+1], rev(out$predictions[,2*i] + out$predictions[,2*i+1])),
3403 col = "#00000020", border = F)
3404 lines(out$predictions$pred, out$predictions[,2*i], col = lineCol, lwd = 2)
3405 }
3406
3407 # legend("bottomright", c(paste("Quantile test: p=", round(out$p.value, digits = 5)), paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(out$p.value < 0.05, "red", "black" ), bty="n")
3408
3409 }
3410 }
3411 invisible(out)
3412 }
3413
3414 x = 0.01
3415 x <= 0.05 & !(is.na(x))
3416
3417
3418 #' Ensures the existence of a valid predictor to plot residuals against
3419 #'
3420 #' @param simulationOutput a DHARMa simulation output or an object that can be converted into a DHARMa simulation output
3421 #' @param predictor an optional predictor. If no predictor is provided, will try to extract the fitted value
3422 #' @keywords internal
3423 ensurePredictor <- function(simulationOutput,
3424 predictor = NULL){
3425 if(!is.null(predictor)){
3426
3427 if(length(predictor) != length(simulationOutput$scaledResiduals)) stop("DHARMa: residuals and predictor do not have the same length. The issue is possibly that you have NAs in your predictor that were removed during the model fit. Remove the NA values from your predictor.")
3428 } else {
3429
3430 predictor = simulationOutput$fittedPredictedResponse
3431 if(is.null(predictor)) stop("DHARMa: can't extract predictor from simulationOutput, and no predictor provided")
3432 }
3433 return(predictor)
3434 }
3435
3436
3437
3438
3439 #plot(simulationOutput)
3440
3441 #plot(simulationOutput$observedResponse, simulationOutput$scaledResiduals, xlab = "predicted", ylab = "Residual", main = "Residual vs. predicted")
3442
3443 #plot(simulationOutput$observedResponse, simulationOutput$fittedPredictedResponse - simulationOutput$observedResponse)
3444
3445 #plot(cumsum(sort(simulationOutput$scaledResiduals)))
3446
3447
3448 #plotConventionalResiduals(fittedModel)
3449
3450
3451 #' Conventional residual plot
3452 #'
3453 #' Convenience function to draw conventional residual plots
3454 #'
3455 #' @param fittedModel a fitted model object
3456 #' @export
3457 plotConventionalResiduals <- function(fittedModel){
3458 opar <- par(mfrow = c(1,3), oma = c(0,1,2,1))
3459 on.exit(par(opar))
3460 plot(predict(fittedModel), resid(fittedModel, type = "deviance"), main = "Deviance" , ylab = "Residual", xlab = "Predicted")
3461 plot(predict(fittedModel), resid(fittedModel, type = "pearson") , main = "Pearson", ylab = "Residual", xlab = "Predicted")
3462 plot(predict(fittedModel), resid(fittedModel, type = "response") , main = "Raw residuals" , ylab = "Residual", xlab = "Predicted")
3463 mtext("Conventional residual plots", outer = T)
3464 }
3465
3466
3467
3468
3469 #
3470 #
3471 # if(quantreg == F){
3472 #
3473 # lines(smooth.spline(simulationOutput$fittedPredictedResponse, simulationOutput$scaledResiduals, df = 10), lty = 2, lwd = 2, col = "red")
3474 #
3475 # abline(h = 0.5, col = "red", lwd = 2)
3476 #
3477 # }else{
3478 #
3479 # #library(gamlss)
3480 #
3481 # # qrnn
3482 #
3483 # # http://r.789695.n4.nabble.com/Quantile-GAM-td894280.html
3484 #
3485 # #require(quantreg)
3486 # #dat <- plyr::arrange(dat,pred)
3487 # #fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.5,data = dat)
3488 #
3489 # probs = c(0.25, 0.50, 0.75)
3490 #
3491 # w <- p <- list()
3492 # for(i in seq_along(probs)){
3493 # capture.output(w[[i]] <- qrnn::qrnn.fit(x = as.matrix(simulationOutput$fittedPredictedResponse), y = as.matrix(simulationOutput$scaledResiduals), n.hidden = 4, tau = probs[i], iter.max = 1000, n.trials = 1, penalty = 1))
3494 # p[[i]] <- qrnn::qrnn.predict(as.matrix(sort(simulationOutput$fittedPredictedResponse)), w[[i]])
3495 # }
3496 #
3497 #
3498 #
3499 # #plot(simulationOutput$fittedPredictedResponse, simulationOutput$scaledResiduals, xlab = "Predicted", ylab = "Residual", main = "Residual vs. predicted\n lines should match", cex.main = 1)
3500 #
3501 # #lines(sort(simulationOutput$fittedPredictedResponse), as.vector(p[[1]]), col = "red")
3502 #
3503 # matlines(sort(simulationOutput$fittedPredictedResponse), matrix(unlist(p), nrow = length(simulationOutput$fittedPredictedResponse), ncol = length(p)), col = "red", lty = 1)
3504 #
3505 # # as.vector(p[[1]])
3506 # #
3507 # #
3508 # # lines(simulationOutput$fittedPredictedResponse,p[[1]], col = "red", lwd = 2)
3509 # # abline(h = 0.5, col = "red", lwd = 2)
3510 # #
3511 # # fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.25,data = dat)
3512 # # lines(unique(dat$pred)[-1],fit$coef[1] + fit$coef[-1], col = "green", lwd = 2, lty =2)
3513 # # abline(h = 0.25, col = "green", lwd = 2, lty =2)
3514 # #
3515 # # fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.75,data = dat)
3516 # # lines(unique(dat$pred)[-1],fit$coef[1] + fit$coef[-1], col = "blue", lwd = 2, lty = 2)
3517 # # abline(h = 0.75, col = "blue", lwd = 2, lty =2)
3518 # }
3519
3520 ####################### plot.R
3521
3522 ####################### random.R
3523
3524 #' Record and restore a random state
3525 #'
3526 #' The aim of this function is to record, manipualate and restor a random state
3527 #'
3528 #' @details This function is intended for two (not mutually exclusive tasks)
3529 #'
3530 #' a) record the current random state
3531 #'
3532 #' b) change the current random state in a way that the previous state can be restored
3533 #'
3534 #' @return a list with various infos about the random state that after function execution, as well as a function to restore the previous state before the function execution
3535 #'
3536 #' @param seed seed argument to set.seed(). NULL = no seed, but random state will be restored. F = random state will not be restored
3537 #' @export
3538 #' @example inst/examples/getRandomStateHelp.R
3539 #' @author Florian Hartig
3540 #'
3541 getRandomState <- function(seed = NULL){
3542
3543 # better to explicitly access the global RS?
3544 # current = get(".Random.seed", .GlobalEnv, ifnotfound = NULL)
3545
3546 current = mget(".Random.seed", envir = .GlobalEnv, ifnotfound = list(NULL))[[1]]
3547
3548 if(is.logical(seed) & seed == F){
3549 restoreCurrent <- function(){}
3550 }else{
3551 restoreCurrent <- function(){
3552 if(is.null(current)) rm(".Random.seed", envir = .GlobalEnv) else assign(".Random.seed", current , envir = .GlobalEnv)
3553 }
3554 }
3555
3556 # setting seed
3557 if(is.numeric(seed)) set.seed(seed)
3558
3559 # ensuring that RNG has been initialized
3560 if (is.null(current))runif(1)
3561
3562 randomState = list(seed, state = get(".Random.seed", globalenv()), kind = RNGkind(), restoreCurrent = restoreCurrent)
3563 return(randomState)
3564 }
3565
3566 ####################### random.R
3567
3568 ######################################### Package DHARMa