Mercurial > repos > ecology > pampa_glmcomm
comparison FunctPAMPAGalaxy.r @ 4:44b9069775ca draft
"planemo upload for repository https://github.com/ColineRoyaux/PAMPA-Galaxy commit 3df1978827a91be30e815dee2ed83a92862d1b1c"
author | ecology |
---|---|
date | Sun, 22 Nov 2020 18:40:23 +0000 |
parents | f0dc3958e65d |
children | 8db02073b671 |
comparison
equal
deleted
inserted
replaced
3:97967fbcf6da | 4:44b9069775ca |
---|---|
9 #### Modified by Coline ROYAUX for integrating within Galaxy-E | 9 #### Modified by Coline ROYAUX for integrating within Galaxy-E |
10 | 10 |
11 ######################################### start of the function fact.def.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | 11 ######################################### start of the function fact.def.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r |
12 ####### Define the finest aggregation with the observation table | 12 ####### Define the finest aggregation with the observation table |
13 | 13 |
14 fact.det.f <- function (Obs, | 14 fact_det_f <- function(obs, |
15 size.class="size.class", | 15 size_class = "size.class", |
16 code.especes="species.code", | 16 code_species = "species.code", |
17 unitobs="observation.unit") | 17 unitobs = "observation.unit") { |
18 { | 18 if (any(is.element(c(size_class), colnames(obs))) && all(! is.na(obs[, size_class]))) { |
19 if (any(is.element(c(size.class), colnames(obs))) && all(! is.na(obs[, size.class]))) | 19 factors <- c(unitobs, code_species, size_class) |
20 { | |
21 factors <- c(unitobs, code.especes, size.class) | |
22 }else{ | 20 }else{ |
23 factors <- c(unitobs, code.especes) | 21 factors <- c(unitobs, code_species) |
24 } | 22 } |
25 return(factors) | 23 return(factors) |
26 } | 24 } |
27 | 25 |
28 ######################################### end of the function fact.def.f | 26 ######################################### end of the function fact.def.f |
29 | 27 |
30 ######################################### start of the function def.typeobs.f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | 28 ######################################### start of the function def_typeobs_f called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r |
31 ####### Define observation type from colnames | 29 ####### Define observation type from colnames |
32 | 30 |
33 def.typeobs.f <- function(Obs) | 31 def_typeobs_f <- function(obs) { |
34 { | 32 if (any(is.element(c("rotation", "rot", "rotate"), colnames(obs)))) { |
35 if (any(is.element(c("rotation","rot","rotate"),colnames(obs)))) | 33 obs_type <- "SVR" |
36 { | 34 }else{ |
37 ObsType <- "SVR" | 35 obs_type <- "other" |
38 }else{ | 36 } |
39 ObsType <- "other" | 37 return(obs_type) |
40 } | 38 } |
41 return(ObsType) | 39 ######################################### end of the function fact.def.f |
42 } | 40 |
43 ######################################### end of the function fact.def.f | 41 ######################################### start of the function create_unitobs called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r |
44 | |
45 ######################################### start of the function create.unitobs called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | |
46 ####### Create unitobs column when inexistant | 42 ####### Create unitobs column when inexistant |
47 create.unitobs <- function(data,year="year",point="point", unitobs="observation.unit") | 43 create_unitobs <- function(data, year = "year", location = "location", unitobs = "observation.unit") { |
48 { | 44 if (is.element(paste(unitobs), colnames(data))) { |
49 if (is.element(paste(unitobs),colnames(data)) && all(grepl("[1-2][0|8|9][0-9]{2}_.*",data[,unitobs])==FALSE)) | 45 unitab <- data |
50 { | 46 }else{ |
51 unitab <- data | 47 |
52 | 48 unitab <- tidyr::unite(data, col = "observation.unit", c(year, location)) |
53 }else{ | |
54 | |
55 unitab <- unite(data,col="observation.unit",c(year,point)) | |
56 } | 49 } |
57 return(unitab) | 50 return(unitab) |
58 } | 51 } |
59 ######################################### start of the function create.unitobs | 52 ######################################### start of the function create_unitobs |
60 | 53 |
61 ######################################### start of the function create.year.point called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r | 54 ######################################### start of the function create_year_location called by FunctExeCalcCommIndexesGalaxy.r and FunctExeCalcPresAbsGalaxy.r |
62 ####### separate unitobs column when existant | 55 ####### separate unitobs column when existant |
63 create.year.point <- function(data,year="year",point="point", unitobs="observation.unit") | 56 create_year_location <- function(data, year = "year", location = "location", unitobs = "observation.unit") { |
64 { | 57 if (all(grepl("[1-2][0|8|9][0-9]{2}_.*", data[, unitobs])) == TRUE) { |
65 if (all(grepl("[1-2][0|8|9][0-9]{2}_.*",data[,unitobs]))==TRUE) | 58 tab <- tidyr::separate(data, col = unitobs, into = c(year, location), sep = "_") |
66 { | 59 }else{ |
67 tab <- separate(data,col=unitobs,into=c(year,point),sep="_") | 60 if (all(grepl("[A-Z]{2}[0-9]{2}.*", data[, unitobs]) == TRUE)) { |
68 }else{ | 61 tab <- tidyr::separate(data, col = unitobs, into = c("site1", year, "obs"), sep = c(2, 4)) |
69 tab <- separate(data,col=unitobs,into=c("site1", year,"obs"),sep=c(2,4)) | 62 tab <- tidyr::unite(tab, col = location, c("site1", "obs")) |
70 tab <- unite(tab, col=point, c("site1","obs")) | 63 }else{ |
71 | 64 tab <- data |
72 } | 65 } |
73 | 66 } |
74 tab <- cbind(tab,observation.unit = data[,unitobs]) | 67 |
68 tab <- cbind(tab, observation.unit = data[, unitobs]) | |
75 | 69 |
76 return(tab) | 70 return(tab) |
77 } | 71 } |
78 ######################################### start of the function create.unitobs | 72 ######################################### start of the function create_year_location |
79 | 73 |
80 ######################################### start of the function check_file called by every Galaxy Rscripts | 74 ######################################### start of the function check_file called by every Galaxy Rscripts |
81 | 75 |
82 check_file<-function(dataset,err_msg,vars,nb_vars){ | 76 check_file <- function(dataset, err_msg, vars, nb_vars) { |
83 | 77 |
84 ## Purpose: General function to check integrity of input file. Will | 78 ## Purpose: General function to check integrity of input file. Will |
85 ## check numbers and contents of variables(colnames). | 79 ## check numbers and contents of variables(colnames). |
86 ## return an error message and exit if mismatch detected | 80 ## return an error message and exit if mismatch detected |
87 ## ---------------------------------------------------------------------- | 81 ## ---------------------------------------------------------------------- |
88 ## Arguments: dataset : dataset name | 82 ## Arguments: dataset : dataset name |
89 ## err_msg : output error | 83 ## err_msg : output error |
90 ## vars : expected name of variables | 84 ## vars : expected name of variables |
91 ## nb_vars : expected number of variables | 85 ## nb_vars : expected number of variables |
92 ## ---------------------------------------------------------------------- | 86 ## ---------------------------------------------------------------------- |
93 ## Author: Alan Amosse, Benjamin Yguel | 87 ## Author: Alan Amosse, Benjamin Yguel |
94 | 88 |
95 if(ncol(dataset) < nb_vars){ #checking for right number of columns in the file if not = error message | 89 if (ncol(dataset) < nb_vars) { #checking for right number of columns in the file if not = error message |
96 cat("\nerr nb var\n") | 90 cat("\nerr nb var\n") |
97 stop(err_msg, call.=FALSE) | 91 stop(err_msg, call. = FALSE) |
98 } | 92 } |
99 | 93 |
100 for(i in vars){ | 94 for (i in vars) { |
101 if(!(i %in% names(dataset))){ #checking colnames | 95 if (!(i %in% names(dataset))) { #checking colnames |
102 stop(err_msg,call.=FALSE) | 96 stop(err_msg, call. = FALSE) |
103 } | 97 } |
104 } | 98 } |
105 } | 99 } |
106 | 100 |
107 ######################################### end of the function check_file | 101 ######################################### end of the function check_file |
108 | 102 |
109 | 103 |
110 ######################################### start of the function statRotationsNumber.f called by calc.numbers.f | 104 ######################################### start of the function stat_rotations_nb_f called by calc_numbers_f |
111 | 105 |
112 statRotationsNumber.f <- function(factors, obs) | 106 stat_rotations_nb_f <- function(factors, obs) { |
113 { | 107 ## Purpose: Computing abundance statistics by rotation (max, sd) |
114 ## Purpose: Computing abundance statistics by rotation (max, sd) | |
115 ## on SVR data | 108 ## on SVR data |
116 ## ---------------------------------------------------------------------- | 109 ## ---------------------------------------------------------------------- |
117 ## Arguments: factors : Names of aggregation factors | 110 ## Arguments: factors : Names of aggregation factors |
118 ## obs : observation data | 111 ## obs : observation data |
119 ## ---------------------------------------------------------------------- | 112 ## ---------------------------------------------------------------------- |
120 ## Author: Yves Reecht, Date: 29 oct. 2012, 16:01 modified by Coline ROYAUX 04 june 2020 | 113 ## Author: Yves Reecht, Date: 29 oct. 2012, 16:01 modified by Coline ROYAUX 04 june 2020 |
121 | 114 |
122 ## Identification of valid rotations : | 115 ## Identification of valid rotations : |
123 if (is.element("observation.unit", factors)) | 116 if (is.element("observation.unit", factors)) { |
124 { | |
125 ## valid rotations (empty must be there as well) : | 117 ## valid rotations (empty must be there as well) : |
126 rotations <- tapply(obs$rotation, | 118 rotations <- tapply(obs$rotation, |
127 as.list(obs[ , c("observation.unit", "rotation"), drop=FALSE]), | 119 as.list(obs[, c("observation.unit", "rotation"), drop = FALSE]), |
128 function(x)length(x) > 0) | 120 function(x)length(x) > 0) |
129 | 121 |
130 ## Changing NA rotations in FALSE : | 122 ## Changing NA rotations in FALSE : |
131 rotations[is.na(rotations)] <- FALSE | 123 rotations[is.na(rotations)] <- FALSE |
132 }else{ | |
133 #stop(mltext("statRotations.err.1")) | |
134 } | 124 } |
135 | 125 |
136 ## ########################################################### | 126 ## ########################################################### |
137 ## Abundance per rotation at chosen aggregation factors : | 127 ## Abundance per rotation at chosen aggregation factors : |
138 nombresR <- tapply(obs$number, | 128 nombres_rot <- tapply(obs$number, |
139 as.list(obs[ , c(factors, "rotation"), drop=FALSE]), | 129 as.list(obs[, c(factors, "rotation"), drop = FALSE]), |
140 function(x,...){ifelse(all(is.na(x)), NA, sum(x,...))}, | 130 function(x, ...) { |
131 ifelse(all(is.na(x)), NA, sum(x, ...)) | |
132 }, | |
141 na.rm = TRUE) | 133 na.rm = TRUE) |
142 | 134 |
143 ## If valid rotation NA are considered 0 : | 135 ## If valid rotation NA are considered 0 : |
144 nombresR <- sweep(nombresR, | 136 nombres_rot <- sweep(nombres_rot, |
145 match(names(dimnames(rotations)), names(dimnames(nombresR)), nomatch=NULL), | 137 match(names(dimnames(rotations)), names(dimnames(nombres_rot)), nomatch = NULL), |
146 rotations, # Tableau des secteurs valides (booléens). | 138 rotations, # Tableau des secteurs valides (booléens). |
147 function(x, y) | 139 function(x, y) { |
148 { | 140 x[is.na(x) & y] <- 0 # Lorsque NA et secteur valide => 0. |
149 x[is.na(x) & y] <- 0 # Lorsque NA et secteur valide => 0. | 141 return(x) |
150 return(x) | 142 }) |
151 }) | |
152 | 143 |
153 ## ################################################## | 144 ## ################################################## |
154 ## Statistics : | 145 ## Statistics : |
155 | 146 |
156 ## Means : | 147 ## Means : |
157 nombresMean <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)), | 148 nb_mean <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)), |
158 function(x,...){ifelse(all(is.na(x)), NA, mean(x,...))}, na.rm=TRUE) | 149 function(x, ...) { |
150 ifelse(all(is.na(x)), NA, mean(x, ...)) | |
151 }, na.rm = TRUE) | |
159 | 152 |
160 ## Maxima : | 153 ## Maxima : |
161 nombresMax <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)), | 154 nb_max <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)), |
162 function(x,...){ifelse(all(is.na(x)), NA, max(x,...))}, na.rm=TRUE) | 155 function(x, ...) { |
156 ifelse(all(is.na(x)), NA, max(x, ...)) | |
157 }, na.rm = TRUE) | |
163 | 158 |
164 ## SD : | 159 ## SD : |
165 nombresSD <- apply(nombresR, which(is.element(names(dimnames(nombresR)), factors)), | 160 nb_sd <- apply(nombres_rot, which(is.element(names(dimnames(nombres_rot)), factors)), |
166 function(x,...){ifelse(all(is.na(x)), NA, sd(x,...))}, na.rm=TRUE) | 161 function(x, ...) { |
162 ifelse(all(is.na(x)), NA, sd(x, ...)) | |
163 }, na.rm = TRUE) | |
167 | 164 |
168 ## Valid rotations count : | 165 ## Valid rotations count : |
169 nombresRotations <- apply(rotations, 1, sum, na.rm=TRUE) | 166 nombres_rotations <- apply(rotations, 1, sum, na.rm = TRUE) |
170 | 167 |
171 ## Results returned as list : | 168 ## Results returned as list : |
172 return(list(nombresMean=nombresMean, nombresMax=nombresMax, nombresSD=nombresSD, | 169 return(list(nb_mean = nb_mean, nb_max = nb_max, nb_sd = nb_sd, |
173 nombresRotations=nombresRotations, nombresTot=nombresR)) | 170 nombres_rotations = nombres_rotations, nombresTot = nombres_rot)) |
174 } | 171 } |
175 | 172 |
176 ######################################### end of the function statRotationsNumber.f | 173 ######################################### end of the function stat_rotations_nb_f |
177 | 174 |
178 ######################################### start of the function calcNumber.default.f called by calc.numbers.f | 175 ######################################### start of the function calc_nb_default_f called by calc_numbers_f |
179 | 176 |
180 calcNumber.default.f <- function(obs, | 177 calc_nb_default_f <- function(obs, |
181 factors=c("observation.unit", "species.code", "size.class"), | 178 factors = c("observation.unit", "species.code", "size.class"), |
182 nbName="number") | 179 nb_name = "number") { |
183 { | 180 ## Purpose : Compute abundances at finest aggregation |
184 ## Purpose : Compute abundances at finest aggregation | |
185 ## --------------------------------------------------------------------- | 181 ## --------------------------------------------------------------------- |
186 ## Arguments: obs : observation table | 182 ## Arguments: obs : observation table |
187 ## factors : aggregation factors | 183 ## factors : aggregation factors |
188 ## nbName : name of abundance column. | 184 ## nb_name : name of abundance column. |
189 ## | 185 ## |
190 ## Output: array with ndimensions = nfactors. | 186 ## Output: array with ndimensions = nfactors. |
191 ## ---------------------------------------------------------------------- | 187 ## ---------------------------------------------------------------------- |
192 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:38 modified by Coline ROYAUX 04 june 2020 | 188 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:38 modified by Coline ROYAUX 04 june 2020 |
193 | 189 |
194 ## Sum individuals number : | 190 ## Sum individuals number : |
195 nbr <- tapply(obs[ , nbName], | 191 nbr <- tapply(obs[, nb_name], |
196 as.list(obs[ , factors]), | 192 as.list(obs[, factors]), |
197 sum, na.rm = TRUE) | 193 sum, na.rm = TRUE) |
198 | 194 |
199 ## Absences as "true zero" : | 195 ## Absences as "true zero" : |
200 nbr[is.na(nbr)] <- 0 | 196 nbr[is.na(nbr)] <- 0 |
201 | 197 |
202 return(nbr) | 198 return(nbr) |
203 } | 199 } |
204 | 200 |
205 ######################################### end of the function calcNumber.default.f | 201 ######################################### end of the function calc_nb_default_f |
206 | 202 |
207 ######################################### start of the function calc.numbers.f | 203 ######################################### start of the function calc_numbers_f |
208 | 204 |
209 calc.numbers.f <- function(obs, ObsType="", factors=c("observation.unit", "species.code", "size.class"), nbName="number") | 205 calc_numbers_f <- function(obs, obs_type = "", factors = c("observation.unit", "species.code", "size.class"), nb_name = "number") { |
210 { | 206 ## Purpose: Produce data.frame used as table from output of calc_nb_default_f(). |
211 ## Purpose: Produce data.frame used as table from output of calcNumber.default.f(). | |
212 ## ---------------------------------------------------------------------- | 207 ## ---------------------------------------------------------------------- |
213 ## Arguments: obs : observation table | 208 ## Arguments: obs : observation table |
214 ## ObsType : Type of observation (SVR, LIT, ...) | 209 ## obs_type : Type of observation (SVR, LIT, ...) |
215 ## factors : aggregation factors | 210 ## factors : aggregation factors |
216 ## nbName : name of abundance column | 211 ## nb_name : name of abundance column |
217 ## | 212 ## |
218 ## Output: data.frame with (N aggregation factors + 1) columns | 213 ## Output: data.frame with (N aggregation factors + 1) columns |
219 ## ---------------------------------------------------------------------- | 214 ## ---------------------------------------------------------------------- |
220 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:46 modified by Coline ROYAUX 04 june 2020 | 215 ## Author: Yves Reecht, Date: 19 déc. 2011, 13:46 modified by Coline ROYAUX 04 june 2020 |
221 | 216 |
222 if (ObsType == "SVR") | 217 if (obs_type == "SVR") { |
223 { | |
224 ## Compute SVR abundances statistics : | 218 ## Compute SVR abundances statistics : |
225 statRotations <- statRotationsNumber.f(factors=factors, | 219 stat_rotations <- stat_rotations_nb_f(factors = factors, |
226 obs=obs) | 220 obs = obs) |
227 | 221 |
228 ## Mean for rotating videos (3 rotations at most times) : | 222 ## Mean for rotating videos (3 rotations at most times) : |
229 nbr <- statRotations[["nombresMean"]] | 223 nbr <- stat_rotations[["nb_mean"]] |
230 | 224 |
231 }else{ | 225 }else{ |
232 | 226 |
233 nbr <- calcNumber.default.f(obs, factors, nbName) | 227 nbr <- calc_nb_default_f(obs, factors, nb_name) |
234 } | 228 } |
235 | 229 |
236 res <- as.data.frame(as.table(nbr), responseName=nbName) | 230 res <- as.data.frame(as.table(nbr), responseName = nb_name) |
237 | 231 |
238 if (is.element("size.class", colnames(res))) | 232 if (is.element("size.class", colnames(res))) { |
239 { | |
240 res$size.class[res$size.class == ""] <- NA | 233 res$size.class[res$size.class == ""] <- NA |
241 }else{} | 234 } |
242 | 235 |
243 ## If integer abundances : | 236 ## If integer abundances : |
244 if (isTRUE(all.equal(res[ , nbName], as.integer(res[ , nbName])))) | 237 if (isTRUE(all.equal(res[, nb_name], as.integer(res[, nb_name])))) { |
245 { | 238 res[, nb_name] <- as.integer(res[, nb_name]) |
246 res[ , nbName] <- as.integer(res[ , nbName]) | 239 } |
247 }else{} | 240 |
248 | 241 if (obs_type == "SVR") { |
249 if (ObsType == "SVR") | |
250 { | |
251 ## statistics on abundances : | 242 ## statistics on abundances : |
252 res$number.max <- as.vector(statRotations[["nombresMax"]]) | 243 res[, "number.max"] <- as.vector(stat_rotations[["nb_max"]]) |
253 res$number.sd <- as.vector(statRotations[["nombresSD"]]) | 244 res[, "number.sd"] <- as.vector(stat_rotations[["nb_sd"]]) |
254 | 245 |
255 }else{} | 246 } |
256 | 247 |
257 return(res) | 248 return(res) |
258 } | 249 } |
259 | 250 |
260 ######################################### end of the function calc.numbers.f | 251 ######################################### end of the function calc_numbers_f |
261 | 252 |
262 ######################################### start of the function presAbs.f called by calcBiodiv.f | 253 ######################################### start of the function pres_abs_f called by calc_biodiv_f |
263 | 254 |
264 presAbs.f <- function(nombres, logical=FALSE) | 255 pres_abs_f <- function(nombres, logical = FALSE) { |
265 { | |
266 ## Purpose: Compute presence absence from abundances | 256 ## Purpose: Compute presence absence from abundances |
267 ## ---------------------------------------------------------------------- | 257 ## ---------------------------------------------------------------------- |
268 ## Arguments: nombres : vector of individuals count. | 258 ## Arguments: nombres : vector of individuals count. |
269 ## logical : (boolean) results as boolean or 0/1 ? | 259 ## logical : (boolean) results as boolean or 0/1 ? |
270 ## ---------------------------------------------------------------------- | 260 ## ---------------------------------------------------------------------- |
271 ## Author: Yves Reecht, Date: 29 oct. 2010, 10:20 modified by Coline ROYAUX 04 june 2020 | 261 ## Author: Yves Reecht, Date: 29 oct. 2010, 10:20 modified by Coline ROYAUX 04 june 2020 |
272 | 262 |
273 if (any(nombres < 0, na.rm=TRUE)) | 263 if (any(nombres < 0, na.rm = TRUE)) { |
274 { | |
275 stop("Negative abundances!") | 264 stop("Negative abundances!") |
276 }else{} | 265 } |
277 | 266 |
278 if (logical) | 267 if (logical) { |
279 { | |
280 return(nombres > 0) | 268 return(nombres > 0) |
281 }else{ | 269 }else{ |
282 nombres[nombres > 0] <- 1 | 270 nombres[nombres > 0] <- 1 |
283 return(nombres) | 271 return(nombres) |
284 } | 272 } |
285 } | 273 } |
286 | 274 |
287 ######################################### end of the function presAbs.f | 275 ######################################### end of the function pres_abs_f |
288 | 276 |
289 ######################################### start of the function betterCbind called by agregations.generic.f | 277 ######################################### start of the function bettercbind called by agregations_generic_f |
290 | 278 |
291 betterCbind <- function(..., dfList=NULL, deparse.level = 1) | 279 bettercbind <- function(..., df_list = NULL, deparse.level = 1) { |
292 { | |
293 ## Purpose: Apply cbind to data frame with mathcing columns but without | 280 ## Purpose: Apply cbind to data frame with mathcing columns but without |
294 ## redundancies. | 281 ## redundancies. |
295 ## ---------------------------------------------------------------------- | 282 ## ---------------------------------------------------------------------- |
296 ## Arguments: same as cbind... | 283 ## Arguments: same as cbind... |
297 ## dfList : data.frames list | 284 ## df_list : data.frames list |
298 ## ---------------------------------------------------------------------- | 285 ## ---------------------------------------------------------------------- |
299 ## Author: Yves Reecht, Date: 17 janv. 2012, 21:10 modified by Coline ROYAUX 04 june 2020 | 286 ## Author: Yves Reecht, Date: 17 janv. 2012, 21:10 modified by Coline ROYAUX 04 june 2020 |
300 | 287 |
301 if (is.null(dfList)) | 288 if (is.null(df_list)) { |
302 { | 289 df_list <- list(...) |
303 dfList <- list(...) | 290 } |
304 }else{} | |
305 | 291 |
306 return(do.call(cbind, | 292 return(do.call(cbind, |
307 c(list(dfList[[1]][ , c(tail(colnames(dfList[[1]]), -1), | 293 c(list(df_list[[1]][, c(tail(colnames(df_list[[1]]), -1), |
308 head(colnames(dfList[[1]]), 1))]), | 294 head(colnames(df_list[[1]]), 1))]), |
309 lapply(dfList[-1], | 295 lapply(df_list[- 1], |
310 function(x, colDel) | 296 function(x, coldel) { |
311 { | 297 return(x[, !is.element(colnames(x), |
312 return(x[ , !is.element(colnames(x), | 298 coldel), |
313 colDel), | 299 drop = FALSE]) |
314 drop=FALSE]) | |
315 }, | 300 }, |
316 colDel=colnames(dfList[[1]])), | 301 coldel = colnames(df_list[[1]])), |
317 deparse.level=deparse.level))) | 302 deparse.level = deparse.level))) |
318 } | 303 } |
319 | 304 |
320 ######################################### end of the function betterCbind | 305 ######################################### end of the function bettercbind |
321 | 306 |
322 ######################################### start of the function agregation.f called by agregations.generic.f | 307 ######################################### start of the function agregation_f called by agregations_generic_f |
323 | 308 |
324 agregation.f <- function(metric, Data, factors, casMetrique, | 309 agregation_f <- function(metric, d_ata, factors, cas_metric, |
325 nbName="number") | 310 nb_name = "number") { |
326 { | |
327 ## Purpose: metric aggregation | 311 ## Purpose: metric aggregation |
328 ## ---------------------------------------------------------------------- | 312 ## ---------------------------------------------------------------------- |
329 ## Arguments: metric: colnames of chosen metric | 313 ## Arguments: metric: colnames of chosen metric |
330 ## Data: Unaggregated data table | 314 ## d_ata: Unaggregated data table |
331 ## factors: aggregation factors vector | 315 ## factors: aggregation factors vector |
332 ## casMetrique: named vector of observation types depending | 316 ## cas_metric: named vector of observation types depending |
333 ## on chosen metric | 317 ## on chosen metric |
334 ## nbName : abundance column name | 318 ## nb_name : abundance column name |
335 ## ---------------------------------------------------------------------- | 319 ## ---------------------------------------------------------------------- |
336 ## Author: Yves Reecht, Date: 20 déc. 2011, 14:29 modified by Coline ROYAUX 04 june 2020 | 320 ## Author: Yves Reecht, Date: 20 déc. 2011, 14:29 modified by Coline ROYAUX 04 june 2020 |
337 | 321 |
338 switch(casMetrique[metric], | 322 switch(cas_metric[metric], |
339 "sum"={ | 323 "sum" = { |
340 res <- tapply(Data[ , metric], | 324 res <- tapply(d_ata[, metric], |
341 as.list(Data[ , factors, drop=FALSE]), | 325 as.list(d_ata[, factors, drop = FALSE]), |
342 function(x) | 326 function(x) { |
343 { | |
344 ifelse(all(is.na(x)), | 327 ifelse(all(is.na(x)), |
345 NA, | 328 NA, |
346 sum(x, na.rm=TRUE)) | 329 sum(x, na.rm = TRUE)) |
347 }) | 330 }) |
348 }, | 331 }, |
349 "w.mean"={ | 332 "w.mean" = { |
350 res <- tapply(1:nrow(Data), | 333 res <- tapply(seq_len(nrow(d_ata)), |
351 as.list(Data[ , factors, drop=FALSE]), | 334 as.list(d_ata[, factors, drop = FALSE]), |
352 function(ii) | 335 function(ii) { |
353 { | 336 ifelse(all(is.na(d_ata[ii, metric])), |
354 ifelse(all(is.na(Data[ii, metric])), | |
355 NA, | 337 NA, |
356 weighted.mean(Data[ii, metric], | 338 weighted.mean(d_ata[ii, metric], |
357 Data[ii, nbName], | 339 d_ata[ii, nb_name], |
358 na.rm=TRUE)) | 340 na.rm = TRUE)) |
359 }) | 341 }) |
360 }, | 342 }, |
361 "w.mean.colonies"={ | 343 "w.mean.colonies" = { |
362 res <- tapply(1:nrow(Data), | 344 res <- tapply(seq_len(nrow(d_ata)), |
363 as.list(Data[ , factors, drop=FALSE]), | 345 as.list(d_ata[, factors, drop = FALSE]), |
364 function(ii) | 346 function(ii) { |
365 { | 347 ifelse(all(is.na(d_ata[ii, metric])), |
366 ifelse(all(is.na(Data[ii, metric])), | |
367 NA, | 348 NA, |
368 weighted.mean(Data[ii, metric], | 349 weighted.mean(d_ata[ii, metric], |
369 Data[ii, "colonies"], | 350 d_ata[ii, "colonies"], |
370 na.rm=TRUE)) | 351 na.rm = TRUE)) |
371 }) | 352 }) |
372 }, | 353 }, |
373 "w.mean.prop"={ | 354 "w.mean.prop" = { |
374 res <- tapply(1:nrow(Data), | 355 res <- tapply(seq_len(nrow(d_ata)), |
375 as.list(Data[ , factors, drop=FALSE]), | 356 as.list(d_ata[, factors, drop = FALSE]), |
376 function(ii) | 357 function(ii) { |
377 { | 358 ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "nombre.tot"], na.rm = TRUE) == 0, |
378 ifelse(all(is.na(Data[ii, metric])) || sum(Data[ii, "nombre.tot"], na.rm=TRUE) == 0, | |
379 NA, | 359 NA, |
380 ifelse(all(na.omit(Data[ii, metric]) == 0), # Pour ne pas avoir NaN. | 360 ifelse(all(na.omit(d_ata[ii, metric]) == 0), |
381 0, | 361 0, |
382 (sum(Data[ii, nbName][ !is.na(Data[ii, metric])], na.rm=TRUE) / | 362 (sum(d_ata[ii, nb_name][!is.na(d_ata[ii, metric])], na.rm = TRUE) / |
383 sum(Data[ii, "nombre.tot"], na.rm=TRUE)) * | 363 sum(d_ata[ii, "nombre.tot"], na.rm = TRUE)) * |
384 ## Correction if size class isn't an aggregation factor | 364 ## Correction if size class isn't an aggregation factor |
385 ## (otherwise value divided by number of present classes) : | 365 ## (otherwise value divided by number of present classes) : |
386 ifelse(is.element("size.class", factors), | 366 ifelse(is.element("size.class", factors), |
387 100, | 367 100, |
388 100 * length(unique(Data$size.class))))) | 368 100 * length(unique(d_ata$size.class))))) |
389 }) | 369 }) |
390 | 370 |
391 }, | 371 }, |
392 "w.mean.prop.bio"={ | 372 "w.mean.prop.bio" = { |
393 res <- tapply(1:nrow(Data), | 373 res <- tapply(seq_len(nrow(d_ata)), |
394 as.list(Data[ , factors, drop=FALSE]), | 374 as.list(d_ata[, factors, drop = FALSE]), |
395 function(ii) | 375 function(ii) { |
396 { | 376 ifelse(all(is.na(d_ata[ii, metric])) || sum(d_ata[ii, "tot.biomass"], na.rm = TRUE) == 0, |
397 ifelse(all(is.na(Data[ii, metric])) || sum(Data[ii, "tot.biomass"], na.rm=TRUE) == 0, | |
398 NA, | 377 NA, |
399 ifelse(all(na.omit(Data[ii, metric]) == 0), # Pour ne pas avoir NaN. | 378 ifelse(all(na.omit(d_ata[ii, metric]) == 0), |
400 0, | 379 0, |
401 (sum(Data[ii, "biomass"][ !is.na(Data[ii, metric])], na.rm=TRUE) / | 380 (sum(d_ata[ii, "biomass"][!is.na(d_ata[ii, metric])], na.rm = TRUE) / |
402 sum(Data[ii, "tot.biomass"], na.rm=TRUE)) * | 381 sum(d_ata[ii, "tot.biomass"], na.rm = TRUE)) * |
403 ## Correction if size class isn't an aggregation factor | 382 ## Correction if size class isn't an aggregation factor |
404 ## (otherwise value divided by number of present classes) : | 383 ## (otherwise value divided by number of present classes) : |
405 ifelse(is.element("size.class", factors), | 384 ifelse(is.element("size.class", factors), |
406 100, | 385 100, |
407 100 * length(unique(Data$size.class))))) | 386 100 * length(unique(d_ata$size.class))))) |
408 }) | 387 }) |
409 | 388 |
410 }, | 389 }, |
411 "pres"={ | 390 "pres" = { |
412 res <- tapply(Data[ , metric], | 391 res <- tapply(d_ata[, metric], |
413 as.list(Data[ , factors, drop=FALSE]), | 392 as.list(d_ata[, factors, drop = FALSE]), |
414 function(x) | 393 function(x) { |
415 { | |
416 ifelse(all(is.na(x)), # When only NAs. | 394 ifelse(all(is.na(x)), # When only NAs. |
417 NA, | 395 NA, |
418 ifelse(any(x > 0, na.rm=TRUE), # Otherwise... | 396 ifelse(any(x > 0, na.rm = TRUE), # Otherwise... |
419 1, # ... presence if at least one observation in the group. | 397 1, # ... presence if at least one observation in the group. |
420 0)) | 398 0)) |
421 }) | 399 }) |
422 }, | 400 }, |
423 "nbMax"={ | 401 "nbMax" = { |
424 ## Recuperation of raw abundances with selections : | |
425 nbTmp <- getReducedSVRdata.f(dataName=".NombresSVR", data=Data) | |
426 | 402 |
427 ## Sum by factor cross / rotation : | 403 ## Sum by factor cross / rotation : |
428 nbTmp2 <- apply(nbTmp, | 404 nb_tmp2 <- apply(nb_tmp, |
429 which(is.element(names(dimnames(nbTmp)), c(factors, "rotation"))), | 405 which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))), |
430 function(x) | 406 function(x) { |
431 { | 407 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) |
432 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE)) | |
433 }) | 408 }) |
434 | 409 |
435 ## Sum by factor cross : | 410 ## Sum by factor cross : |
436 res <- as.array(apply(nbTmp2, | 411 res <- as.array(apply(nb_tmp2, |
437 which(is.element(names(dimnames(nbTmp)), factors)), | 412 which(is.element(names(dimnames(nb_tmp)), factors)), |
438 function(x) | 413 function(x) { |
439 { | 414 ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE)) |
440 ifelse(all(is.na(x)), NA, max(x, na.rm=TRUE)) | |
441 })) | 415 })) |
442 }, | 416 }, |
443 "nbSD"={ | 417 "nbSD" = { |
444 ## Recuperation of raw abundances with selections : | |
445 nbTmp <- getReducedSVRdata.f(dataName=".NombresSVR", data=Data) | |
446 | 418 |
447 ## Sum by factor cross / rotation : | 419 ## Sum by factor cross / rotation : |
448 nbTmp2 <- apply(nbTmp, | 420 nb_tmp2 <- apply(nb_tmp, |
449 which(is.element(names(dimnames(nbTmp)), c(factors, "rotation"))), | 421 which(is.element(names(dimnames(nb_tmp)), c(factors, "rotation"))), |
450 function(x) | 422 function(x) { |
451 { | 423 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) |
452 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE)) | |
453 }) | 424 }) |
454 | 425 |
455 ## Sum by factor cross : | 426 ## Sum by factor cross : |
456 res <- as.array(apply(nbTmp2, | 427 res <- as.array(apply(nb_tmp2, |
457 which(is.element(names(dimnames(nbTmp)), factors)), | 428 which(is.element(names(dimnames(nb_tmp)), factors)), |
458 function(x) | 429 function(x) { |
459 { | 430 ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE)) |
460 ifelse(all(is.na(x)), NA, sd(x, na.rm=TRUE)) | |
461 })) | 431 })) |
462 }, | 432 }, |
463 "densMax"={ | 433 "densMax" = { |
464 ## Recuperation of raw abundances with selections : | |
465 densTmp <- getReducedSVRdata.f(dataName=".DensitesSVR", data=Data) | |
466 | 434 |
467 ## Sum by factor cross / rotation : | 435 ## Sum by factor cross / rotation : |
468 densTmp2 <- apply(densTmp, | 436 dens_tmp2 <- apply(dens_tmp, |
469 which(is.element(names(dimnames(densTmp)), c(factors, "rotation"))), | 437 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))), |
470 function(x) | 438 function(x) { |
471 { | 439 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) |
472 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE)) | |
473 }) | 440 }) |
474 | 441 |
475 ## Sum by factor cross : | 442 ## Sum by factor cross : |
476 res <- as.array(apply(densTmp2, | 443 res <- as.array(apply(dens_tmp2, |
477 which(is.element(names(dimnames(densTmp)), factors)), | 444 which(is.element(names(dimnames(dens_tmp)), factors)), |
478 function(x) | 445 function(x) { |
479 { | 446 ifelse(all(is.na(x)), NA, max(x, na.rm = TRUE)) |
480 ifelse(all(is.na(x)), NA, max(x, na.rm=TRUE)) | |
481 })) | 447 })) |
482 }, | 448 }, |
483 "densSD"={ | 449 "densSD" = { |
484 ## Recuperation of raw abundances with selections : | |
485 densTmp <- getReducedSVRdata.f(dataName=".DensitesSVR", data=Data) | |
486 | 450 |
487 ## Sum by factor cross / rotation : | 451 ## Sum by factor cross / rotation : |
488 densTmp2 <- apply(densTmp, | 452 dens_tmp2 <- apply(dens_tmp, |
489 which(is.element(names(dimnames(densTmp)), c(factors, "rotation"))), | 453 which(is.element(names(dimnames(dens_tmp)), c(factors, "rotation"))), |
490 function(x) | 454 function(x) { |
491 { | 455 ifelse(all(is.na(x)), NA, sum(x, na.rm = TRUE)) |
492 ifelse(all(is.na(x)), NA, sum(x, na.rm=TRUE)) | |
493 }) | 456 }) |
494 | 457 |
495 ## Sum by factor cross : | 458 ## Sum by factor cross : |
496 res <- as.array(apply(densTmp2, | 459 res <- as.array(apply(dens_tmp2, |
497 which(is.element(names(dimnames(densTmp)), factors)), | 460 which(is.element(names(dimnames(dens_tmp)), factors)), |
498 function(x) | 461 function(x) { |
499 { | 462 ifelse(all(is.na(x)), NA, sd(x, na.rm = TRUE)) |
500 ifelse(all(is.na(x)), NA, sd(x, na.rm=TRUE)) | |
501 })) | 463 })) |
502 }, | 464 }, |
503 "%.nesting"={ | 465 "%.nesting" = { |
504 res <- tapply(1:nrow(Data), | 466 res <- tapply(seq_len(nrow(d_ata)), |
505 as.list(Data[ , factors, drop=FALSE]), | 467 as.list(d_ata[, factors, drop = FALSE]), |
506 function(ii) | 468 function(ii) { |
507 { | 469 ifelse(all(is.na(d_ata[ii, metric])), |
508 ifelse(all(is.na(Data[ii, metric])), | |
509 NA, | 470 NA, |
510 weighted.mean(Data[ii, metric], | 471 weighted.mean(d_ata[ii, metric], |
511 Data[ii, "readable.tracks"], | 472 d_ata[ii, "readable.tracks"], |
512 na.rm=TRUE)) | 473 na.rm = TRUE)) |
513 }) | 474 }) |
514 }, | 475 }, |
515 stop("Not implemented!") | 476 stop("Not implemented!") |
516 ) | 477 ) |
517 | 478 |
518 ## dimension names | 479 ## dimension names |
519 names(dimnames(res)) <- c(factors) | 480 names(dimnames(res)) <- c(factors) |
520 | 481 |
521 ## Transformation to long format : | 482 ## Transformation to long format : |
522 reslong <- as.data.frame(as.table(res), responseName=metric) | 483 reslong <- as.data.frame(as.table(res), responseName = metric) |
523 reslong <- reslong[ , c(tail(colnames(reslong), 1), head(colnames(reslong), -1))] # metric first | 484 reslong <- reslong[, c(tail(colnames(reslong), 1), head(colnames(reslong), -1))] # metric first |
524 | 485 |
525 return(reslong) | 486 return(reslong) |
526 } | 487 } |
527 | 488 |
528 ######################################### end of the function agregation.f | 489 ######################################### end of the function agregation_f |
529 | 490 |
530 ######################################### start of the function agregations.generic.f called y calcBiodiv.f in FucntExeCalcCommIndexesGalaxy.r | 491 ######################################### start of the function agregations_generic_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r |
531 | 492 |
532 agregations.generic.f <- function(Data, metrics, factors, listFact=NULL, unitSpSz=NULL, unitSp=NULL, | 493 agregations_generic_f <- function(d_ata, metrics, factors, list_fact = NULL, unit_sp_sz = NULL, unit_sp = NULL, |
533 nbName="number") | 494 nb_name = "number") { |
534 { | 495 ## Purpose: Aggregate data |
535 ## Purpose: Aggregate data | 496 ## ---------------------------------------------------------------------- |
536 ## ---------------------------------------------------------------------- | 497 ## Arguments: d_ata : data set |
537 ## Arguments: Data : data set | |
538 ## metrics : aggregated metric | 498 ## metrics : aggregated metric |
539 ## factors : aggregation factors | 499 ## factors : aggregation factors |
540 ## listFact : other factors to aggregate and add to output | 500 ## list_fact : other factors to aggregate and add to output |
541 ## unitSpSz : Metrics table by unitobs/species/Size Class | 501 ## unit_sp_sz : Metrics table by unitobs/species/Size Class |
542 ## unitSp : Metrics table by unitobs/species | 502 ## unit_sp : Metrics table by unitobs/species |
543 ## nbName : abundance colname | 503 ## nb_name : abundance colname |
544 ## | 504 ## |
545 ## Output : aggregated data frame | 505 ## Output : aggregated data frame |
546 ## ---------------------------------------------------------------------- | 506 ## ---------------------------------------------------------------------- |
547 ## Author: Yves Reecht, Date: 18 oct. 2010, 15:47 modified by Coline ROYAUX 04 june 2020 | 507 ## Author: Yves Reecht, Date: 18 oct. 2010, 15:47 modified by Coline ROYAUX 04 june 2020 |
548 | 508 |
549 ## trt depending on metric type : | 509 ## trt depending on metric type : |
550 casMetrique <- c("number"="sum", | 510 cas_metric <- c("number" = "sum", |
551 "mean.length"="w.mean", | 511 "mean.length" = "w.mean", |
552 "taille_moy"="w.mean", | 512 "taille_moy" = "w.mean", |
553 "biomass"="sum", | 513 "biomass" = "sum", |
554 "Biomass"="sum", | 514 "Biomass" = "sum", |
555 "weight"="sum", | 515 "weight" = "sum", |
556 "mean.weight"="w.mean", | 516 "mean.weight" = "w.mean", |
557 "density"="sum", | 517 "density" = "sum", |
558 "Density"="sum", | 518 "Density" = "sum", |
559 "CPUE"="sum", | 519 "CPUE" = "sum", |
560 "CPUE.biomass"="sum", | 520 "CPUE.biomass" = "sum", |
561 "pres.abs"="pres", | 521 "presence_absence" = "pres", |
562 "abundance.prop.SC"="w.mean.prop", # Not OK [!!!] ? | 522 "abundance.prop.SC" = "w.mean.prop", # Not OK [!!!] ? |
563 "biomass.prop.SC"="w.mean.prop.bio", # Not OK [!!!] ? | 523 "biomass.prop.SC" = "w.mean.prop.bio", # Not OK [!!!] ? |
564 ## Benthos : | 524 ## Benthos : |
565 "colonies"="sum", | 525 "colonies" = "sum", |
566 "coverage"="sum", | 526 "coverage" = "sum", |
567 "mean.size.colonies"="w.mean.colonies", | 527 "mean.size.colonies" = "w.mean.colonies", |
568 ## SVR (expérimental) : | 528 ## SVR (expérimental) : |
569 "number.max"="nbMax", | 529 "number.max" = "nbMax", |
570 "number.sd"="nbSD", | 530 "number.sd" = "nbSD", |
571 "density.max"="densMax", | 531 "density.max" = "densMax", |
572 "density.sd"="densSD", | 532 "density.sd" = "densSD", |
573 "biomass.max"="sum", | 533 "biomass.max" = "sum", |
574 "spawning.success"="%.nesting", | 534 "spawning.success" = "%.nesting", |
575 "spawnings"="sum", | 535 "spawnings" = "sum", |
576 "readable.tracks"="sum", | 536 "readable.tracks" = "sum", |
577 "tracks.number"="sum") | 537 "tracks.number" = "sum") |
578 | 538 |
579 ## add "readable.tracks" for egg laying percentage : | 539 ## add "readable.tracks" for egg laying percentage : |
580 if (any(casMetrique[metrics] == "%.nesting")) | 540 if (any(cas_metric[metrics] == "%.nesting")) { |
581 { | 541 if (is.element("size.class", colnames(d_ata))) { |
582 if (is.element("size.class", colnames(Data))) | 542 if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini") |
583 { | 543 |
584 if (is.null(unitSpSz)) stop("unitSpSz doit être défini") | 544 d_ata <- merge(d_ata, |
585 | 545 unit_sp_sz[, c("species.code", "observation.unit", "size.class", "readable.tracks")], |
586 Data <- merge(Data, | 546 by = c("species.code", "observation.unit", "size.class"), |
587 unitSpSz[ , c("species.code", "observation.unit", "size.class", "readable.tracks")], | 547 suffixes = c("", ".y")) |
588 by=c("species.code", "observation.unit", "size.class"), | |
589 suffixes=c("", ".y")) | |
590 }else{ | 548 }else{ |
591 if (is.null(unitSp)) stop("unitSp must be defined") | 549 if (is.null(unit_sp)) stop("unit_sp must be defined") |
592 | 550 |
593 Data <- merge(Data, | 551 d_ata <- merge(d_ata, |
594 unitSp[ , c("species.code", "observation.unit", "readable.tracks")], | 552 unit_sp[, c("species.code", "observation.unit", "readable.tracks")], |
595 by=c("species.code", "observation.unit"), | 553 by = c("species.code", "observation.unit"), |
596 suffixes=c("", ".y")) | 554 suffixes = c("", ".y")) |
597 } | 555 } |
598 }else{} | 556 } |
599 | 557 |
600 ## Add "number" field for computing ponderate means if absent : | 558 ## Add "number" field for computing ponderate means if absent : |
601 if (any(casMetrique[metrics] == "w.mean" | casMetrique[metrics] == "w.mean.prop")) | 559 if (any(cas_metric[metrics] == "w.mean" | cas_metric[metrics] == "w.mean.prop")) { |
602 { | 560 if (is.element("size.class", colnames(d_ata))) { |
603 if (is.element("size.class", colnames(Data))) | 561 if (is.null(unit_sp_sz)) stop("unit_sp_sz must be defined") |
604 { | 562 |
605 if (is.null(unitSpSz)) stop("unitSpSz must be defined") | 563 d_ata <- merge(d_ata, |
606 | 564 unit_sp_sz[, c("species.code", "observation.unit", "size.class", nb_name)], |
607 Data <- merge(Data, | 565 by = c("species.code", "observation.unit", "size.class"), |
608 unitSpSz[ , c("species.code", "observation.unit", "size.class", nbName)], | 566 suffixes = c("", ".y")) |
609 by=c("species.code", "observation.unit", "size.class"), | |
610 suffixes=c("", ".y")) | |
611 | 567 |
612 ## add tot abundance / species / observation unit : | 568 ## add tot abundance / species / observation unit : |
613 nbTot <- tapply(unitSpSz[ , nbName], | 569 nb_tot <- tapply(unit_sp_sz[, nb_name], |
614 as.list(unitSpSz[ , c("species.code", "observation.unit")]), | 570 as.list(unit_sp_sz[, c("species.code", "observation.unit")]), |
615 sum, na.rm=TRUE) | 571 sum, na.rm = TRUE) |
616 | 572 |
617 Data <- merge(Data, | 573 d_ata <- merge(d_ata, |
618 as.data.frame(as.table(nbTot), responseName="nombre.tot")) | 574 as.data.frame(as.table(nb_tot), responseName = "nombre.tot")) |
619 }else{ | 575 }else{ |
620 if (is.null(unitSp)) stop("unitSp must be defined") | 576 if (is.null(unit_sp)) stop("unit_sp must be defined") |
621 | 577 |
622 Data <- merge(Data, | 578 d_ata <- merge(d_ata, |
623 unitSp[ , c("species.code", "observation.unit", nbName)], # [!!!] unitSpSz ? | 579 unit_sp[, c("species.code", "observation.unit", nb_name)], # [!!!] unit_sp_sz ? |
624 by=c("species.code", "observation.unit"), | 580 by = c("species.code", "observation.unit"), |
625 suffixes=c("", ".y")) | 581 suffixes = c("", ".y")) |
626 } | 582 } |
627 }else{} | 583 } |
628 | 584 |
629 ## Add biomass field of biomass proportion by size class : | 585 ## Add biomass field of biomass proportion by size class : |
630 if (any(casMetrique[metrics] == "w.mean.prop.bio")) | 586 if (any(cas_metric[metrics] == "w.mean.prop.bio")) { |
631 { | 587 if (is.null(unit_sp_sz)) stop("unit_sp_sz doit être défini") |
632 if (is.null(unitSpSz)) stop("unitSpSz doit être défini") | 588 |
633 | 589 d_ata <- merge(d_ata, |
634 Data <- merge(Data, | 590 unit_sp_sz[, c("species.code", "observation.unit", "size.class", "biomass")], |
635 unitSpSz[ , c("species.code", "observation.unit", "size.class", "biomass")], | 591 by = c("species.code", "observation.unit", "size.class"), |
636 by=c("species.code", "observation.unit", "size.class"), | 592 suffixes = c("", ".y")) |
637 suffixes=c("", ".y")) | |
638 | 593 |
639 ## add tot biomass / species / observation unit : | 594 ## add tot biomass / species / observation unit : |
640 biomTot <- tapply(unitSpSz$biomass, | 595 biom_tot <- tapply(unit_sp_sz$biomass, |
641 as.list(unitSpSz[ , c("species.code", "observation.unit")]), | 596 as.list(unit_sp_sz[, c("species.code", "observation.unit")]), |
642 function(x) | 597 function(x) { |
643 { | |
644 ifelse(all(is.na(x)), | 598 ifelse(all(is.na(x)), |
645 NA, | 599 NA, |
646 sum(x, na.rm=TRUE)) | 600 sum(x, na.rm = TRUE)) |
647 }) | 601 }) |
648 | 602 |
649 Data <- merge(Data, | 603 d_ata <- merge(d_ata, |
650 as.data.frame(as.table(biomTot), responseName="tot.biomass")) | 604 as.data.frame(as.table(biom_tot), responseName = "tot.biomass")) |
651 } | 605 } |
652 | 606 |
653 ## add colony field for ponderate means pondérées if absent : | 607 ## add colony field for ponderate means pondérées if absent : |
654 if (any(casMetrique[metrics] == "w.mean.colonies" & ! is.element("colonies", colnames(Data)))) | 608 if (any(cas_metric[metrics] == "w.mean.colonies" & ! is.element("colonies", colnames(d_ata)))) { |
655 { | 609 d_ata$colonies <- unit_sp[match(apply(d_ata[, c("species.code", "observation.unit")], |
656 Data$colonies <- unitSp[match(apply(Data[ , c("species.code", "observation.unit")], | 610 1, paste, collapse = "*"), |
657 1, paste, collapse="*"), | 611 apply(unit_sp[, c("species.code", "observation.unit")], |
658 apply(unitSp[ , c("species.code", "observation.unit")], | 612 1, paste, collapse = "*")), "colonies"] |
659 1, paste, collapse="*")), "colonies"] | 613 } |
660 }else{} | |
661 | 614 |
662 | 615 |
663 ## Aggregation of metric depending on factors : | 616 ## Aggregation of metric depending on factors : |
664 reslong <- betterCbind(dfList=lapply(metrics, # sapply used to have names | 617 reslong <- bettercbind(df_list = lapply(metrics, # sapply used to have names |
665 agregation.f, | 618 agregation_f, |
666 Data=Data, factors=factors, casMetrique=casMetrique, | 619 d_ata = d_ata, factors = factors, cas_metric = cas_metric, |
667 nbName=nbName)) | 620 nb_name = nb_name)) |
668 | 621 |
669 ## Aggregation and add other factors : | 622 ## Aggregation and add other factors : |
670 if ( ! (is.null(listFact) || length(listFact) == 0)) | 623 if (! (is.null(list_fact) || length(list_fact) == 0)) { |
671 { | |
672 reslong <- cbind(reslong, | 624 reslong <- cbind(reslong, |
673 sapply(Data[ , listFact, drop=FALSE], | 625 sapply(d_ata[, list_fact, drop = FALSE], |
674 function(fact) | 626 function(fact) { |
675 { | |
676 tapply(fact, | 627 tapply(fact, |
677 as.list(Data[ , factors, drop=FALSE]), | 628 as.list(d_ata[, factors, drop = FALSE]), |
678 function(x) | 629 function(x) { |
679 { | 630 if (length(x) > 1 && length(unique(x)) > 1) { # must be one modality |
680 if (length(x) > 1 && length(unique(x)) > 1) # must be one modality | |
681 { | |
682 return(NULL) # otherwise it is NULL | 631 return(NULL) # otherwise it is NULL |
683 }else{ | 632 }else{ |
684 unique(as.character(x)) | 633 unique(as.character(x)) |
685 } | 634 } |
686 }) | 635 }) |
687 })) | 636 })) |
688 }else{} | 637 } |
689 | 638 |
690 ## If some factors aren't at the right class : | 639 ## If some factors aren't at the right class : |
691 if (any(tmp <- sapply(reslong[ , listFact, drop=FALSE], class) != sapply(Data[ , listFact, drop=FALSE], class))) | 640 if (any(tmp <- sapply(reslong[, list_fact, drop = FALSE], class) != sapply(d_ata[, list_fact, drop = FALSE], class))) { |
692 { | 641 for (i in which(tmp)) { |
693 for (i in which(tmp)) | 642 switch(sapply(d_ata[, list_fact, drop = FALSE], class)[i], |
694 { | 643 "integer" = { |
695 switch(sapply(Data[ , listFact, drop=FALSE], class)[i], | 644 reslong[, list_fact[i]] <- as.integer(as.character(reslong[, list_fact[i]])) |
696 "integer"={ | |
697 reslong[ , listFact[i]] <- as.integer(as.character(reslong[ , listFact[i]])) | |
698 }, | 645 }, |
699 "numeric"={ | 646 "numeric" = { |
700 reslong[ , listFact[i]] <- as.numeric(as.character(reslong[ , listFact[i]])) | 647 reslong[, list_fact[i]] <- as.numeric(as.character(reslong[, list_fact[i]])) |
701 }, | 648 }, |
702 reslong[ , listFact[i]] <- eval(call(paste("as", sapply(Data[ , listFact, drop=FALSE], class)[i], sep="."), | 649 reslong[, list_fact[i]] <- eval(call(paste("as", sapply(d_ata[, list_fact, drop = FALSE], class)[i], sep = "."), |
703 reslong[ , listFact[i]])) | 650 reslong[, list_fact[i]])) |
704 ) | 651 ) |
705 } | 652 } |
706 }else{} | 653 } |
707 | 654 |
708 ## Initial order of factors levels : | 655 ## Initial order of factors levels : |
709 reslong <- as.data.frame(sapply(colnames(reslong), | 656 reslong <- as.data.frame(sapply(colnames(reslong), |
710 function(x) | 657 function(x) { |
711 { | 658 if (is.factor(reslong[, x])) { |
712 if (is.factor(reslong[ , x])) | 659 return(factor(reslong[, x], levels = levels(d_ata[, x]))) |
713 { | |
714 return(factor(reslong[ , x], levels=levels(Data[ , x]))) | |
715 }else{ | 660 }else{ |
716 return(reslong[ , x]) | 661 return(reslong[, x]) |
717 } | 662 } |
718 }, simplify=FALSE)) | 663 }, simplify = FALSE)) |
719 | 664 |
720 | 665 |
721 ## Check of other aggregated factors supplémentaires. There must be no NULL elements : | 666 ## Check of other aggregated factors supplémentaires. There must be no NULL elements : |
722 if (any(sapply(reslong[ , listFact], function(x){any(is.null(unlist(x)))}))) | 667 if (any(sapply(reslong[, list_fact], function(x) { |
723 { | 668 any(is.null(unlist(x))) |
669 }))) { | |
724 warning(paste("One of the suppl. factors is probably a subset", | 670 warning(paste("One of the suppl. factors is probably a subset", |
725 " of the observations grouping factor(s).", sep="")) | 671 " of the observations grouping factor(s).", sep = "")) |
726 return(NULL) | 672 return(NULL) |
727 }else{ | 673 }else{ |
728 return(reslong) | 674 return(reslong) |
729 } | 675 } |
730 } | 676 } |
731 | 677 |
732 ######################################### end of the function agregations.generic.f | 678 ######################################### end of the function agregations_generic_f |
733 | 679 |
734 ######################################### start of the function dropLevels.f called y calcBiodiv.f in FucntExeCalcCommIndexesGalaxy.r and modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r | 680 ######################################### start of the function drop_levels_f called y calc_biodiv_f in FucntExeCalcCommIndexesGalaxy.r and glm_community in FunctExeCalcGLMGalaxy.r |
735 dropLevels.f <- function(df, which=NULL) | 681 drop_levels_f <- function(df, which = NULL) { |
736 { | |
737 ## Purpose: Suppress unused levels of factors | 682 ## Purpose: Suppress unused levels of factors |
738 ## ---------------------------------------------------------------------- | 683 ## ---------------------------------------------------------------------- |
739 ## Arguments: df : a data.frame | 684 ## Arguments: df : a data.frame |
740 ## which : included columns index (all by default) | 685 ## which : included columns index (all by default) |
741 ## ---------------------------------------------------------------------- | 686 ## ---------------------------------------------------------------------- |
742 ## Author: Yves Reecht, Date: 10 août 2010, 13:29 modified by Coline ROYAUX 04 june 2020 | 687 ## Author: Yves Reecht, Date: 10 août 2010, 13:29 modified by Coline ROYAUX 04 june 2020 |
743 | 688 |
744 if (class(df) != "data.frame") | 689 if (class(df) != "data.frame") { |
745 { | |
746 stop("'df' must be a data.frame") | 690 stop("'df' must be a data.frame") |
747 }else{ | 691 }else{ |
748 if (is.null(which)) | 692 if (is.null(which)) { |
749 { | 693 x <- as.data.frame(sapply(df, function(x) { |
750 x <- as.data.frame(sapply(df, function(x) | 694 return(x[, drop = TRUE]) |
751 { | 695 }, simplify = FALSE), |
752 return(x[ ,drop=TRUE]) | 696 stringsAsFactors = FALSE) |
753 }, simplify=FALSE), | |
754 stringsAsFactors=FALSE) | |
755 }else{ # Only some columns used | 697 }else{ # Only some columns used |
756 x <- df | 698 x <- df |
757 | 699 |
758 x[ , which] <- as.data.frame(sapply(df[ , which, drop=FALSE], | 700 x[, which] <- as.data.frame(sapply(df[, which, drop = FALSE], |
759 function(x) | 701 function(x) { |
760 { | 702 return(x[, drop = TRUE]) |
761 return(x[ , drop=TRUE]) | 703 }, simplify = FALSE), |
762 }, simplify=FALSE), | 704 stringsAsFactors = FALSE) |
763 stringsAsFactors=FALSE) | |
764 } | 705 } |
765 | 706 |
766 return(x) | 707 return(x) |
767 } | 708 } |
768 } | 709 } |
769 ######################################### end of the function dropLevels.f | 710 ######################################### end of the function drop_levels_f |
770 | 711 |
771 ######################################### start of the function subsetToutesTables.f called by modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r | 712 ######################################### start of the function subset_all_tables_f called by glm_community in FunctExeCalcGLMGalaxy.r |
772 | 713 |
773 subsetToutesTables.f <- function(metrique, tabMetrics, facteurs, selections, | 714 subset_all_tables_f <- function(metrique, tab_metrics, facteurs, selections, |
774 tabUnitobs, refesp, tableMetrique="", nbName="number", ObsType = "", | 715 tab_unitobs, refesp, tab_metrique = "", nb_name = "number", obs_type = "", |
775 exclude=NULL, add=c("species.code", "observation.unit")) | 716 exclude = NULL, add = c("species.code", "observation.unit")) { |
776 { | |
777 ## Purpose: Extract useful data only from chosen metrics and factors | 717 ## Purpose: Extract useful data only from chosen metrics and factors |
778 ## ---------------------------------------------------------------------- | 718 ## ---------------------------------------------------------------------- |
779 ## Arguments: metrique : chosen metric | 719 ## Arguments: metrique : chosen metric |
780 ## facteurs : all chosen factors | 720 ## facteurs : all chosen factors |
781 ## selections : corresponding modality selected | 721 ## selections : corresponding modality selected |
782 ## tableMetrique : metrics table name | 722 ## tab_metrique : metrics table name |
783 ## exclude : factors levels to exclude | 723 ## exclude : factors levels to exclude |
784 ## add : field to add to data table | 724 ## add : field to add to data table |
785 ## ---------------------------------------------------------------------- | 725 ## ---------------------------------------------------------------------- |
786 ## Author: Yves Reecht, Date: 6 août 2010, 16:46 modified by Coline ROYAUX 04 june 2020 | 726 ## Author: Yves Reecht, Date: 6 août 2010, 16:46 modified by Coline ROYAUX 04 june 2020 |
787 | 727 |
788 ## If no metrics table available : | 728 ## If no metrics table available : |
789 if (is.element(tableMetrique, c("", "TableOccurrences", "TablePresAbs"))) | 729 if (is.element(tab_metrique, c("", "TableOccurrences", "TablePresAbs"))) { |
790 { | 730 tab_metrique <- "unit_sp" |
791 tableMetrique <- "unitSp" | 731 } |
792 }else{} | 732 |
793 | 733 cas_tables <- c("unit_sp" = "unit_sp", |
794 casTables <- c("unitSp"="unitSp", | 734 "TablePresAbs" = "unit_sp", |
795 "TablePresAbs"="unitSp", | 735 "unit_sp_sz" = "unit_sp_sz") |
796 "unitSpSz"="unitSpSz") | |
797 | 736 |
798 ## Recuperation of metrics table : | 737 ## Recuperation of metrics table : |
799 dataMetrique <- tabMetrics | 738 data_metric <- tab_metrics |
800 unitobs <- tabUnitobs | 739 unitobs <- tab_unitobs |
801 refesp <- refesp | 740 refesp <- refesp |
802 | 741 |
803 ## If no metrics available or already computed : | 742 ## If no metrics available or already computed : |
804 if (is.element(metrique, c("", "occurrence.frequency"))) | 743 if (is.element(metrique, c("", "occurrence.frequency"))) { |
805 { | |
806 metrique <- "tmp" | 744 metrique <- "tmp" |
807 dataMetrique$tmp <- 0 | 745 data_metric$tmp <- 0 |
808 dataMetrique$tmp[dataMetrique[ , nbName] > 0] <- 1 | 746 data_metric$tmp[data_metric[, nb_name] > 0] <- 1 |
809 }else{} | 747 } |
810 | 748 |
811 if (!is.null(add)) | 749 if (!is.null(add)) { |
812 { | 750 metriques <- c(metrique, add[is.element(add, colnames(data_metric))]) |
813 metriques <- c(metrique, add[is.element(add, colnames(dataMetrique))]) | |
814 }else{ | 751 }else{ |
815 metriques <- metrique | 752 metriques <- metrique |
816 } | 753 } |
817 | 754 |
818 ## Subset depending on metrics table | 755 ## Subset depending on metrics table |
819 switch(casTables[tableMetrique], | 756 switch(cas_tables[tab_metrique], |
820 ## Observation table by unitobs and species : | 757 ## Observation table by unitobs and species : |
821 unitSp={ | 758 unit_sp = { |
822 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , metriques, drop=FALSE], | 759 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE], |
823 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])], | 760 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])], |
824 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs | 761 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs |
825 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE], | 762 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE], |
826 refesp[match(dataMetrique$species.code[!is.na(dataMetrique[ , metrique])], | 763 refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])], |
827 refesp$species.code), # ajout des colonnes sélectionnées d'especes | 764 refesp$species.code), # ajout des colonnes sélectionnées d'especes |
828 facteurs[is.element(facteurs, colnames(refesp))], drop=FALSE]) | 765 facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE]) |
829 }, | 766 }, |
830 ## Observation table by unitobs, species and size class : | 767 ## Observation table by unitobs, species and size class : |
831 unitSpSz={ | 768 unit_sp_sz = { |
832 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , | 769 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), |
833 c(metriques, "size.class"), drop=FALSE], | 770 c(metriques, "size.class"), drop = FALSE], |
834 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])], | 771 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])], |
835 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs | 772 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs |
836 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE], | 773 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE], |
837 refesp[match(dataMetrique$species.code[!is.na(dataMetrique[ , metrique])], | 774 refesp[match(data_metric$species.code[!is.na(data_metric[, metrique])], |
838 refesp$species.code), # ajout des colonnes sélectionnées d'especes | 775 refesp$species.code), # ajout des colonnes sélectionnées d'especes |
839 facteurs[is.element(facteurs, colnames(refesp))], drop=FALSE]) | 776 facteurs[is.element(facteurs, colnames(refesp))], drop = FALSE]) |
840 }, | 777 }, |
841 ## Other cases : | 778 ## Other cases : |
842 restmp <- cbind(dataMetrique[!is.na(dataMetrique[ , metrique]) , metriques, drop=FALSE], | 779 restmp <- cbind(data_metric[!is.na(data_metric[, metrique]), metriques, drop = FALSE], |
843 unitobs[match(dataMetrique$observation.unit[!is.na(dataMetrique[ , metrique])], | 780 unitobs[match(data_metric$observation.unit[!is.na(data_metric[, metrique])], |
844 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs. | 781 unitobs$observation.unit), # ajout des colonnes sélectionnées d'unitobs. |
845 facteurs[is.element(facteurs, colnames(unitobs))], drop=FALSE]) | 782 facteurs[is.element(facteurs, colnames(unitobs))], drop = FALSE]) |
846 ) | 783 ) |
847 | 784 |
848 selCol <- which(!is.na(selections)) | 785 sel_col <- which(!is.na(selections)) |
849 if (!is.null(exclude)) | 786 if (!is.null(exclude)) { |
850 { | 787 sel_col <- sel_col[sel_col != exclude] |
851 selCol <- selCol[selCol != exclude] | |
852 } | 788 } |
853 | 789 |
854 ## Particular case of size classes : | 790 ## Particular case of size classes : |
855 if (is.element("size.class", colnames(restmp))) | 791 if (is.element("size.class", colnames(restmp))) { |
856 { | 792 if (length(grep("^[[:digit:]]*[-_][[:digit:]]*$", unique(as.character(restmp$size.class)), perl = TRUE)) == |
857 if (length(grep("^[[:digit:]]*[-_][[:digit:]]*$", unique(as.character(restmp$size.class)), perl=TRUE)) == | 793 length(unique(as.character(restmp$size.class)))) { |
858 length(unique(as.character(restmp$size.class)))) | 794 restmp[, "size.class"] <- |
859 { | |
860 restmp$size.class <- | |
861 factor(as.character(restmp$size.class), | 795 factor(as.character(restmp$size.class), |
862 levels=unique(as.character(restmp$size.class))[ | 796 levels = unique(as.character(restmp$size.class))[ |
863 order(as.numeric(sub("^([[:digit:]]*)[-_][[:digit:]]*$", | 797 order(as.numeric(sub("^([[:digit:]]*)[-_][[:digit:]]*$", |
864 "\\1", | 798 "\\1", |
865 unique(as.character(restmp$size.class)), | 799 unique(as.character(restmp$size.class)), |
866 perl=TRUE)), | 800 perl = TRUE)), |
867 na.last=FALSE)]) | 801 na.last = FALSE)]) |
868 }else{ | 802 }else{ |
869 restmp$size.class <- factor(restmp$size.class) | 803 restmp[, "size.class"] <- factor(restmp$size.class) |
870 } | 804 } |
871 }else{} | 805 } |
872 | 806 |
873 ## Biomass and density conversion -> /100m² : | 807 ## Biomass and density conversion -> /100m² : |
874 if (any(is.element(colnames(restmp), c("biomass", "density", | 808 if (any(is.element(colnames(restmp), c("biomass", "density", |
875 "biomass.max", "density.max", | 809 "biomass.max", "density.max", |
876 "biomass.sd", "density.sd"))) && ObsType != "fishing") | 810 "biomass.sd", "density.sd"))) && obs_type != "fishing") { |
877 { | 811 restmp[, is.element(colnames(restmp), |
878 restmp[ , is.element(colnames(restmp), | |
879 c("biomass", "density", | 812 c("biomass", "density", |
880 "biomass.max", "density.max", | 813 "biomass.max", "density.max", |
881 "biomass.sd", "density.sd"))] <- 100 * | 814 "biomass.sd", "density.sd"))] <- 100 * |
882 restmp[, is.element(colnames(restmp), | 815 restmp[, is.element(colnames(restmp), |
883 c("biomass", "density", | 816 c("biomass", "density", |
884 "biomass.max", "density.max", | 817 "biomass.max", "density.max", |
885 "biomass.sd", "density.sd"))] | 818 "biomass.sd", "density.sd"))] |
886 }else{} | 819 } |
887 | 820 |
888 return(restmp) | 821 return(restmp) |
889 } | 822 } |
890 | 823 |
891 ######################################### end of the function subsetToutesTables.f | 824 ######################################### end of the function subset_all_tables_f |
892 | 825 |
893 | 826 ######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r |
894 ######################################### start of the function sortiesLM.f called by modeleLineaireWP2.unitobs.f in FunctExeCalcGLMGalaxy.r | 827 |
895 sortiesLM.f <- function(objLM, TabSum, #formule, | 828 organise_fact <- function(list_rand, list_fact) { |
896 metrique, factAna, cut, colAna, listFact, lev = NULL, Data, | 829 ## Purpose: organise response factors |
897 Log=FALSE, sufixe=NULL, type="espece") | 830 ## ---------------------------------------------------------------------- |
898 { | 831 ## Arguments: list_rand : Analysis random factors list |
832 ## list_fact : Analysis factors list | |
833 ## ---------------------------------------------------------------------- | |
834 ## Author: Coline ROYAUX 14 november 2020 | |
835 | |
836 if (list_rand[1] != "None") { | |
837 if (all(is.element(list_fact, list_rand)) || list_fact[1] == "None") { | |
838 resp_fact <- paste("(1|", paste(list_rand, collapse = ") + (1|"), ")") | |
839 list_f <- NULL | |
840 list_fact <- list_rand | |
841 }else{ | |
842 list_f <- list_fact[!is.element(list_fact, list_rand)] | |
843 resp_fact <- paste(paste(list_f, collapse = " + "), " + (1|", paste(list_rand, collapse = ") + (1|"), ")") | |
844 list_fact <- c(list_f, list_rand) | |
845 } | |
846 }else{ | |
847 list_f <- list_fact | |
848 resp_fact <- paste(list_fact, collapse = " + ") | |
849 } | |
850 return(list(resp_fact, list_f, list_fact)) | |
851 } | |
852 | |
853 ######################################### end of the function organise_fact | |
854 | |
855 ######################################### start of the function organise_fact called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r | |
856 distrib_choice <- function(distrib = distrib, metrique = metrique, data = tmpd_ata) { | |
857 ## Purpose: choose the right distribution | |
858 ## ---------------------------------------------------------------------- | |
859 ## Arguments: data : data used for analysis | |
860 ## metrique : Chosen metric | |
861 ## distrib : distribution law selected by user | |
862 ## ---------------------------------------------------------------------- | |
863 ## Author: Coline ROYAUX 14 november 2020 | |
864 | |
865 if (distrib == "None") { | |
866 if (metrique == "presence_absence") { | |
867 chose_distrib <- "binomial" | |
868 }else{ | |
869 switch(class(data[, metrique]), | |
870 "integer" = { | |
871 chose_distrib <- "poisson" | |
872 }, | |
873 "numeric" = { | |
874 chose_distrib <- "gaussian" | |
875 }, | |
876 stop("Selected metric class doesn't fit, you should select an integer or a numeric variable")) | |
877 } | |
878 }else{ | |
879 chose_distrib <- distrib | |
880 } | |
881 return(chose_distrib) | |
882 } | |
883 | |
884 ######################################### end of the function organise_fact | |
885 | |
886 ######################################### start of the function create_res_table called by modeleLineaireWP2.xxx.f in FunctExeCalcGLMxxGalaxy.r | |
887 create_res_table <- function(list_rand, list_fact, row, lev, distrib) { | |
888 ## Purpose: create results table | |
889 ## ---------------------------------------------------------------------- | |
890 ## Arguments: list_rand : Analysis random factors list | |
891 ## list_fact : Analysis factors list | |
892 ## row : rows of results table = species or separation factor | |
893 ## lev : Levels of analysis factors list | |
894 ## distrib : distribution law | |
895 ## ---------------------------------------------------------------------- | |
896 ## Author: Coline ROYAUX 04 october 2020 | |
897 | |
898 if (list_rand[1] != "None") { ## if random effects | |
899 tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, BIC = NA, logLik = NA, deviance = NA, df.resid = NA) | |
900 colrand <- unlist(lapply(list_rand, | |
901 FUN = function(x) { | |
902 lapply(c("Std.Dev", "NbObservation", "NbLevels"), | |
903 FUN = function(y) { | |
904 paste(x, y, collapse = ":") | |
905 }) | |
906 })) | |
907 tab_sum[, colrand] <- NA | |
908 | |
909 if (! is.null(lev)) { ## if fixed effects + random effects | |
910 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
911 FUN = function(x) { | |
912 lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
913 FUN = function(y) { | |
914 paste(x, y, collapse = ":") | |
915 }) | |
916 })) | |
917 | |
918 }else{ ## if no fixed effects | |
919 colcoef <- NULL | |
920 } | |
921 | |
922 }else{ ## if no random effects | |
923 tab_sum <- data.frame(analysis = row, Interest.var = NA, distribution = NA, AIC = NA, Resid.deviance = NA, df.resid = NA, Null.deviance = NA, df.null = NA) | |
924 | |
925 switch(distrib, | |
926 "gaussian" = { | |
927 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
928 FUN = function(x) { | |
929 lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
930 FUN = function(y) { | |
931 paste(x, y, collapse = ":") | |
932 }) | |
933 })) | |
934 | |
935 }, | |
936 "quasipoisson" = { | |
937 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
938 FUN = function(x) { | |
939 lapply(c("Estimate", "Std.Err", "Tvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
940 FUN = function(y) { | |
941 paste(x, y, collapse = ":") | |
942 }) | |
943 })) | |
944 | |
945 } | |
946 , { | |
947 colcoef <- unlist(lapply(c("(Intercept)", lev), | |
948 FUN = function(x) { | |
949 lapply(c("Estimate", "Std.Err", "Zvalue", "Pvalue", "IC_up", "IC_inf", "signif"), | |
950 FUN = function(y) { | |
951 paste(x, y, collapse = ":") | |
952 }) | |
953 })) | |
954 }) | |
955 | |
956 } | |
957 | |
958 tab_sum[, colcoef] <- NA | |
959 | |
960 | |
961 return(tab_sum) | |
962 } | |
963 ######################################### end of the function create_res_table | |
964 | |
965 ######################################### start of the function sorties_lm_f called by glm_community in FunctExeCalcGLMGalaxy.r | |
966 sorties_lm_f <- function(obj_lm, obj_lmy, tab_sum, #formule, | |
967 metrique, fact_ana, cut, col_ana, list_fact, list_rand, lev = NULL, d_ata, | |
968 log = FALSE, sufixe = NULL) { | |
899 ## Purpose: Form GLM and LM results | 969 ## Purpose: Form GLM and LM results |
900 ## ---------------------------------------------------------------------- | 970 ## ---------------------------------------------------------------------- |
901 ## Arguments: objLM : lm object | 971 ## Arguments: obj_lm : lm object |
902 ## TabSum : output summary table | 972 ## obj_lmy : lm object with year as continuous |
973 ## tab_sum : output summary table | |
903 ## formule : LM formula | 974 ## formule : LM formula |
904 ## metrique : Chosen metric | 975 ## metrique : Chosen metric |
905 ## factAna : separation factor | 976 ## fact_ana : separation factor |
906 ## cut : level of separation factor | 977 ## cut : level of separation factor |
907 ## colAna : colname for separation factor in output summary table | 978 ## col_ana : colname for separation factor in output summary table |
908 ## listFact : Analysis factors list | 979 ## list_fact : Analysis factors list |
980 ## list_rand : Analysis random factors list | |
909 ## levels : Levels of analysis factors list | 981 ## levels : Levels of analysis factors list |
910 ## Data : Data used for analysis | 982 ## d_ata : d_ata used for analysis |
911 ## Log : put log on metric ? (boolean) | 983 ## log : put log on metric ? (boolean) |
912 ## sufixe : sufix for file name | 984 ## sufixe : sufix for file name |
913 ## type : analysis type | |
914 ## ---------------------------------------------------------------------- | 985 ## ---------------------------------------------------------------------- |
915 ## Author: Yves Reecht, Date: 25 août 2010, 16:19 modified by Coline ROYAUX 04 june 2020 | 986 ## Author: Yves Reecht, Date: 25 août 2010, 16:19 modified by Coline ROYAUX 04 june 2020 |
916 | 987 |
917 sumLM <- summary(objLM) | 988 tab_sum[, "Interest.var"] <- as.character(metrique) |
918 if (length(grep("^glmmTMB", objLM$call)) > 0) #if random effects | 989 sum_lm <- summary(obj_lm) |
919 { | 990 tab_sum[, "distribution"] <- as.character(sum_lm$family[1]) |
920 TabSum[TabSum[,colAna]==cut,"AIC"] <- sumLM$AICtab[1] | 991 |
921 TabSum[TabSum[,colAna]==cut,"BIC"] <- sumLM$AICtab[2] | 992 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { #if random effects |
922 TabSum[TabSum[,colAna]==cut,"logLik"] <- sumLM$AICtab[3] | 993 tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$AICtab[1] |
923 TabSum[TabSum[,colAna]==cut,"deviance"] <- sumLM$AICtab[4] | 994 tab_sum[tab_sum[, col_ana] == cut, "BIC"] <- sum_lm$AICtab[2] |
924 TabSum[TabSum[,colAna]==cut,"df.resid"] <- sumLM$AICtab[5] | 995 tab_sum[tab_sum[, col_ana] == cut, "logLik"] <- sum_lm$AICtab[3] |
925 | 996 tab_sum[tab_sum[, col_ana] == cut, "deviance"] <- sum_lm$AICtab[4] |
926 if (! is.null(lev)) ## if fixed effects + random effects | 997 tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$AICtab[5] |
927 { | 998 |
928 TabCoef <- as.data.frame(sumLM$coefficients$cond) | 999 if (! is.null(lev)) { ## if fixed effects + random effects |
929 TabCoef$signif <- lapply(TabCoef[,"Pr(>|z|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}}) | 1000 tab_coef <- as.data.frame(sum_lm$coefficients$cond) |
930 | 1001 tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) { |
931 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Zvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"z value"] | 1002 if (!is.na(x) && x < 0.05) { |
932 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|z|)"] | 1003 "yes" |
933 | 1004 }else{ |
934 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Zvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"z value"]}else{NA}})) | 1005 "no" |
935 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|z|)"]}else{NA}})) | 1006 } |
936 }else{} | 1007 }) |
937 | 1008 |
938 switch(as.character(length(sumLM$varcor$cond)), | 1009 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"] |
939 "1"={StdD <- c(sumLM$varcor$cond[[1]])}, | 1010 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"] |
940 "2"={StdD <- c(sumLM$varcor$cond[[1]],sumLM$varcor$cond[[2]])}, | 1011 |
941 StdD <- NULL) | 1012 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { |
942 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"Std.Dev",collapse="|"),colnames(TabSum))] <- StdD | 1013 if (length(grep(x, rownames(tab_coef))) > 0) { |
943 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"NbObservation",collapse="|"),colnames(TabSum))] <- sumLM$nobs | 1014 tab_coef[grepl(x, rownames(tab_coef)), "z value"] |
944 TabSum[TabSum[,colAna]==cut,grepl(paste(listRand,"NbLevels",collapse="|"),colnames(TabSum))] <- unlist(lapply(listRand,FUN=function(x){nlevels(Data[,x])})) | 1015 }else{ |
945 | 1016 NA |
1017 } | |
1018 })) | |
1019 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1020 if (length(grep(x, rownames(tab_coef))) > 0) { | |
1021 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"] | |
1022 }else{ | |
1023 NA | |
1024 } | |
1025 })) | |
1026 | |
1027 if (any(obj_lmy != "")) { | |
1028 sum_lmy <- summary(obj_lmy) | |
1029 tab_coefy <- as.data.frame(sum_lmy$coefficients$cond) | |
1030 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) { | |
1031 if (!is.na(x) && x < 0.05) { | |
1032 "yes" | |
1033 }else{ | |
1034 "no" | |
1035 } | |
1036 }) | |
1037 tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA) | |
1038 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA) | |
1039 } | |
1040 | |
1041 } | |
1042 | |
1043 switch(as.character(length(sum_lm$varcor$cond)), | |
1044 "1" = { | |
1045 std_d <- c(sum_lm$varcor$cond[[1]]) | |
1046 }, | |
1047 "2" = { | |
1048 std_d <- c(sum_lm$varcor$cond[[1]], sum_lm$varcor$cond[[2]]) | |
1049 }, | |
1050 std_d <- NULL) | |
1051 | |
1052 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "Std.Dev", collapse = "|"), colnames(tab_sum))] <- std_d | |
1053 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbObservation", collapse = "|"), colnames(tab_sum))] <- sum_lm$nobs | |
1054 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(list_rand, "NbLevels", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(list_rand, FUN = function(x) { | |
1055 nlevels(d_ata[, x]) | |
1056 })) | |
1057 | |
946 }else{ ## if fixed effects only | 1058 }else{ ## if fixed effects only |
947 | 1059 |
948 TabSum[TabSum[,colAna]==cut,"AIC"] <- sumLM$aic | 1060 tab_sum[tab_sum[, col_ana] == cut, "AIC"] <- sum_lm$aic |
949 TabSum[TabSum[,colAna]==cut,"Resid.deviance"] <- sumLM$deviance | 1061 tab_sum[tab_sum[, col_ana] == cut, "Resid.deviance"] <- sum_lm$deviance |
950 TabSum[TabSum[,colAna]==cut,"df.resid"] <- sumLM$df.residual | 1062 tab_sum[tab_sum[, col_ana] == cut, "df.resid"] <- sum_lm$df.residual |
951 TabSum[TabSum[,colAna]==cut,"Null.deviance"] <- sumLM$null.deviance | 1063 tab_sum[tab_sum[, col_ana] == cut, "Null.deviance"] <- sum_lm$null.deviance |
952 TabSum[TabSum[,colAna]==cut,"df.null"] <- sumLM$df.null | 1064 tab_sum[tab_sum[, col_ana] == cut, "df.null"] <- sum_lm$df.null |
953 TabCoef <- as.data.frame(sumLM$coefficients) | 1065 tab_coef <- as.data.frame(sum_lm$coefficients) |
954 | 1066 |
955 if (sumLM$family[1] == "gaussian" || sumLM$family[1] == "quasipoisson") | 1067 if (any(obj_lmy != "")) { |
956 { | 1068 sum_lmy <- summary(obj_lmy) |
957 | 1069 tab_coefy <- as.data.frame(sum_lmy$coefficients) |
958 TabCoef$signif <- lapply(TabCoef[,"Pr(>|t|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}}) | 1070 } |
959 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Tvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"t value"] | 1071 |
960 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|t|)"] | 1072 if (sum_lm$family[1] == "gaussian" || sum_lm$family[1] == "quasipoisson") { |
961 | 1073 |
962 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Tvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"t value"]}else{NA}})) | 1074 tab_coef$signif <- lapply(tab_coef[, "Pr(>|t|)"], FUN = function(x) { |
963 | 1075 if (!is.na(x) && x < 0.05) { |
964 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|t|)"]}else{NA}})) | 1076 "yes" |
965 }else{ | 1077 }else{ |
966 TabCoef$signif <- lapply(TabCoef[,"Pr(>|z|)"],FUN=function(x){if(!is.na(x) && x < 0.05){"yes"}else{"no"}}) | 1078 "no" |
967 | 1079 } |
968 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Zvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"z value"] | 1080 }) |
969 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Pvalue",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Pr(>|z|)"] | 1081 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Tvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "t value"] |
970 | 1082 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|t|)"] |
971 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Zvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"z value"]}else{NA}})) | 1083 |
972 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Pvalue",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Pr(>|z|)"]}else{NA}})) | 1084 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Tvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { |
973 } | 1085 if (length(grep(x, rownames(tab_coef))) > 0) { |
974 } | 1086 tab_coef[grepl(x, rownames(tab_coef)), "t value"] |
975 | 1087 }else{ |
976 if (! is.null(lev)) ## if fixed effects | 1088 NA |
977 { | 1089 } |
978 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Estimate",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Estimate"] | 1090 })) |
979 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*Std.Err",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"Std. Error"] | 1091 |
980 TabSum[TabSum[,colAna]==cut,grepl("Intercept.*signif",colnames(TabSum))] <- TabCoef[grepl("Intercept",rownames(TabCoef)),"signif"] | 1092 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { |
981 | 1093 if (length(grep(x, rownames(tab_coef))) > 0) { |
982 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Estimate",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Estimate"]}else{NA}})) | 1094 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|t|)"] |
983 | 1095 }else{ |
984 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"Std.Err",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"Std. Error"]}else{NA}})) | 1096 NA |
985 TabSum[TabSum[,colAna]==cut,grepl(paste(lev,"signif",collapse="|"),colnames(TabSum))] <- unlist(lapply(lev,FUN=function(x){if (length(grep(x,rownames(TabCoef))) > 0) {TabCoef[grepl(x,rownames(TabCoef)),"signif"]}else{NA}})) | 1097 } |
986 }else{} | 1098 })) |
987 | 1099 |
988 return(TabSum) | 1100 if (any(obj_lmy != "")) { |
989 | 1101 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|t|)"], FUN = function(x) { |
990 } | 1102 if (!is.na(x) && x < 0.05) { |
991 | 1103 "yes" |
992 | 1104 }else{ |
993 ######################################### end of the function sortiesLM.f | 1105 "no" |
994 | 1106 } |
995 ######################################### start of the function graphTitle.f called by sortiesLM.f | 1107 }) |
996 | 1108 tab_sum[tab_sum[, col_ana] == cut, "year Tvalue"] <- ifelse(length(tab_coefy["year", "t value"]) > 0, tab_coefy["year", "t value"], NA) |
997 graphTitle.f <- function(metrique, modGraphSel, factGraph, listFact, model=NULL, type="espece", | 1109 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|t|)"], NA) |
998 lang = getOption("P.lang")) | 1110 } |
999 { | 1111 |
1000 ## Purpose: Automatically write a name for a graph | 1112 }else{ |
1001 ## ---------------------------------------------------------------------- | 1113 tab_coef$signif <- lapply(tab_coef[, "Pr(>|z|)"], FUN = function(x) { |
1002 ## Arguments: | 1114 if (!is.na(x) && x < 0.05) { |
1003 ## ---------------------------------------------------------------------- | 1115 "yes" |
1004 ## Author: Yves Reecht, Date: 14 oct. 2010, 15:44 modified by Coline ROYAUX 04 june 2020 | 1116 }else{ |
1005 return(paste(ifelse(is.null(model), | 1117 "no" |
1006 "Values of ", | 1118 } |
1007 paste(model, | 1119 }) |
1008 " for", | 1120 |
1009 sep="")), | 1121 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Zvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "z value"] |
1010 metrique, | 1122 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Pvalue", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Pr(>|z|)"] |
1011 ifelse(is.element(type, c("espece", "unitobs", "CL_espece", "unitobs(CL)")), | 1123 |
1012 paste("aggregated"), | 1124 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Zvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { |
1013 ""), | 1125 if (length(grep(x, rownames(tab_coef))) > 0) { |
1014 switch(type, | 1126 tab_coef[grepl(x, rownames(tab_coef)), "z value"] |
1015 "espece"=" per species and station", | 1127 }else{ |
1016 "CL_espece"=" per size class, species and station", | 1128 NA |
1017 "unitobs"=" per station", | 1129 } |
1018 "unitobs(CL)"=" per station", | 1130 })) |
1019 "CL_unitobs"=" per size class and station", | 1131 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Pvalue", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { |
1020 "biodiv"=" per station", | 1132 if (length(grep(x, rownames(tab_coef))) > 0) { |
1021 ""), | 1133 tab_coef[grepl(x, rownames(tab_coef)), "Pr(>|z|)"] |
1022 switch(type, | 1134 }else{ |
1023 "espece"={ | 1135 NA |
1024 ifelse(modGraphSel == "", # Only separation factor if defined | 1136 } |
1025 "", | 1137 })) |
1026 paste("\nfor the field", | 1138 |
1027 " '", factGraph, "' = ", modGraphSel, sep="")) | 1139 if (any(obj_lmy != "")) { |
1028 }, | 1140 tab_coefy$signif <- lapply(tab_coefy[, "Pr(>|z|)"], FUN = function(x) { |
1029 "CL_espece"={ | 1141 if (!is.na(x) && x < 0.05) { |
1030 ifelse(modGraphSel == "", # Only separation factor if defined | 1142 "yes" |
1031 "", | 1143 }else{ |
1032 paste("\nfor the field", | 1144 "no" |
1033 " '", factGraph, "' = ", modGraphSel, sep="")) | 1145 } |
1034 }, | 1146 }) |
1035 "unitobs"={ | 1147 |
1036 ifelse(modGraphSel[1] == "", # Only separation factor if defined | 1148 tab_sum[tab_sum[, col_ana] == cut, "year Zvalue"] <- ifelse(length(tab_coefy["year", "z value"]) > 0, tab_coefy["year", "z value"], NA) |
1037 "\nfor all species", | 1149 tab_sum[tab_sum[, col_ana] == cut, "year Pvalue"] <- ifelse(length(tab_coefy["year", "Pr(>|z|)"]) > 0, tab_coefy["year", "Pr(>|z|)"], NA) |
1038 paste("\nfor all species matching", | 1150 } |
1039 " '", factGraph, "' = (", | 1151 } |
1040 paste(modGraphSel, collapse=", "), ")", sep="")) | 1152 } |
1041 }, | 1153 |
1042 "unitobs(CL)"={ | 1154 if (! is.null(lev)) { ## if fixed effects |
1043 ifelse(modGraphSel[1] == "", # Only separation factor if defined | 1155 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Estimate", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Estimate"] |
1044 "\nfor all size classes", | 1156 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*Std.Err", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "Std. Error"] |
1045 paste("\nfor size classes matching", | 1157 tab_sum[tab_sum[, col_ana] == cut, grepl("Intercept.*signif", colnames(tab_sum))] <- tab_coef[grepl("Intercept", rownames(tab_coef)), "signif"] |
1046 " '", factGraph, "' = (", | 1158 |
1047 paste(modGraphSel, collapse=", "), ")", sep="")) | 1159 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Estimate", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { |
1048 }, | 1160 if (length(grep(x, rownames(tab_coef))) > 0) { |
1049 "CL_unitobs"={ | 1161 tab_coef[grepl(x, rownames(tab_coef)), "Estimate"] |
1050 ifelse(modGraphSel[1] == "", # Only separation factor if defined | 1162 }else{ |
1051 "\nfor all species", | 1163 NA |
1052 paste("\nfor all species matching", | 1164 } |
1053 " '", factGraph, "' = (", | 1165 })) |
1054 paste(modGraphSel, collapse=", "), ")", sep="")) | 1166 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "Std.Err", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { |
1055 }, | 1167 if (length(grep(x, rownames(tab_coef))) > 0) { |
1056 "biodiv"={ | 1168 tab_coef[grepl(x, rownames(tab_coef)), "Std. Error"] |
1057 ifelse(modGraphSel[1] == "", # Only separation factor if defined | 1169 }else{ |
1058 "", | 1170 NA |
1059 paste("\nfor stations matching", | 1171 } |
1060 " '", factGraph, "' = (", | 1172 })) |
1061 paste(modGraphSel, collapse=", "), ")", sep="")) | 1173 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "signif", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { |
1062 }, | 1174 if (length(grep(x, rownames(tab_coef))) > 0) { |
1063 ""), | 1175 tab_coef[grepl(x, rownames(tab_coef)), "signif"] |
1064 "\n by ", | 1176 }else{ |
1065 paste(sapply(listFact[length(listFact):1], | 1177 NA |
1066 function(x)paste(c(## varNames.f(x, "article"), | 1178 } |
1067 "", | 1179 })) |
1068 x, collapse="")), | 1180 |
1069 collapse=" and"), | 1181 if (any(obj_lmy != "")) { |
1070 "\n", sep=""))) | 1182 tab_sum[tab_sum[, col_ana] == cut, "year Estimate"] <- ifelse(length(tab_coefy["year", "Estimate"]) > 0, tab_coefy["year", "Estimate"], NA) |
1071 } | 1183 tab_sum[tab_sum[, col_ana] == cut, "year Std.Err"] <- ifelse(length(tab_coefy["year", "Std. Error"]) > 0, tab_coefy["year", "Std. Error"], NA) |
1072 | 1184 tab_sum[tab_sum[, col_ana] == cut, "year signif"] <- ifelse(length(tab_coefy["year", "signif"]) > 0, tab_coefy["year", "signif"], NA) |
1073 ######################################### end of the function graphTitle.f | 1185 } |
1074 | 1186 |
1075 ######################################### start of the function noteGLM.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f | 1187 } |
1076 | 1188 |
1077 noteGLM.f <- function(data, objLM, metric, listFact, details = FALSE) | 1189 ic <- tryCatch(as.data.frame(confint(obj_lm)), error = function(e) { |
1078 { | 1190 }) |
1191 | |
1192 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_up", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1193 if (length(grep(x, rownames(ic))) > 0) { | |
1194 ic[grepl(x, rownames(ic)), "97.5 %"] | |
1195 }else{ | |
1196 NA | |
1197 } | |
1198 })) | |
1199 tab_sum[tab_sum[, col_ana] == cut, grepl(paste(lev, "IC_inf", collapse = "|"), colnames(tab_sum))] <- unlist(lapply(lev, FUN = function(x) { | |
1200 if (length(grep(x, rownames(ic))) > 0) { | |
1201 ic[grepl(x, rownames(ic)), "2.5 %"] | |
1202 }else{ | |
1203 NA | |
1204 } | |
1205 })) | |
1206 | |
1207 return(tab_sum) | |
1208 | |
1209 } | |
1210 | |
1211 | |
1212 ######################################### end of the function sorties_lm_f | |
1213 | |
1214 | |
1215 ######################################### start of the function note_glm_f called by glm_species and glm_community | |
1216 | |
1217 note_glm_f <- function(data, obj_lm, metric, list_fact, details = FALSE) { | |
1079 ## Purpose: Note your GLM analysis | 1218 ## Purpose: Note your GLM analysis |
1080 ## ---------------------------------------------------------------------- | 1219 ## ---------------------------------------------------------------------- |
1081 ## Arguments: data : Dataframe used for analysis | 1220 ## Arguments: data : d_ataframe used for analysis |
1082 ## objLM : GLM assessed | 1221 ## obj_lm : GLM assessed |
1083 ## metric : selected metric | 1222 ## metric : selected metric |
1084 ## listFact : Analysis factors list | 1223 ## list_fact : Analysis factors list |
1224 ## details : detailed output ? | |
1085 ## ---------------------------------------------------------------------- | 1225 ## ---------------------------------------------------------------------- |
1086 ## Author: Coline ROYAUX, 26 june 2020 | 1226 ## Author: Coline ROYAUX, 26 june 2020 |
1087 | 1227 |
1088 rate <- 0 | 1228 rate <- 0 |
1089 detres <- list(complete_plan=NA, balanced_plan=NA, NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA) | 1229 detres <- list(complete_plan = NA, balanced_plan = NA, NA_proportion_OK = NA, no_residual_dispersion = NA, uniform_residuals = NA, outliers_proportion_OK = NA, no_zero_inflation = NA, observation_factor_ratio_OK = NA, enough_levels_random_effect = NA, rate = NA) |
1090 | 1230 |
1091 #### Data criterions #### | 1231 #### d_ata criterions #### |
1092 | 1232 |
1093 ## Plan | 1233 ## Plan |
1094 | 1234 |
1095 plan <- as.data.frame(table(data[,listFact])) | 1235 plan <- as.data.frame(table(data[, list_fact])) |
1096 | 1236 |
1097 if (nrow(plan[plan$Freq==0,]) < nrow(plan)*0.1) # +0.5 if less than 10% of possible factor's level combinations aren't represented in the sampling scheme | 1237 if (nrow(plan[plan$Freq == 0, ]) < nrow(plan) * 0.1) { # +0.5 if less than 10% of possible factor's level combinations aren't represented in the sampling scheme |
1098 { | 1238 rate <- rate + 0.5 |
1099 rate <- rate + 0.5 | |
1100 detres$complete_plan <- TRUE | 1239 detres$complete_plan <- TRUE |
1101 | 1240 |
1102 if (summary(as.factor(plan$Freq))[1] > nrow(plan)*0.9) # +0.5 if the frequency of the most represented frequency of possible factor's levels combinations is superior to 90% of the total number of possible factor's levels combinations | 1241 if (summary(as.factor(plan$Freq))[1] > nrow(plan) * 0.9) { # +0.5 if the frequency of the most represented frequency of possible factor's levels combinations is superior to 90% of the total number of possible factor's levels combinations |
1103 { | |
1104 rate <- rate + 0.5 | 1242 rate <- rate + 0.5 |
1105 detres$balanced_plan <- TRUE | 1243 detres$balanced_plan <- TRUE |
1106 }else{} | 1244 } |
1107 | 1245 |
1108 }else{ | 1246 }else{ |
1109 detres$complete_plan <- FALSE | 1247 detres$complete_plan <- FALSE |
1110 detres$balanced_plan <- FALSE | 1248 detres$balanced_plan <- FALSE |
1111 } | 1249 } |
1112 | 1250 |
1113 if (nrow(data) - nrow(na.omit(data)) < nrow(data)*0.1) # +1 if less than 10% of the lines in the dataframe bares a NA | 1251 if (nrow(data) - nrow(na.omit(data)) < nrow(data) * 0.1) { # +1 if less than 10% of the lines in the dataframe bares a NA |
1114 { | |
1115 rate <- rate + 1 | 1252 rate <- rate + 1 |
1116 detres$NA_proportion_OK <- TRUE | 1253 detres["NA_proportion_OK"] <- TRUE |
1117 }else{ | 1254 }else{ |
1118 detres$NA_proportion_OK <- FALSE | 1255 detres["NA_proportion_OK"] <- FALSE |
1119 } | 1256 } |
1120 | 1257 |
1121 #### Model criterions #### | 1258 #### Model criterions #### |
1122 | 1259 |
1123 if (length(grep("quasi",objLM$family)) == 0) #DHARMa doesn't work with quasi distributions | 1260 if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions |
1124 { | 1261 |
1125 | 1262 residuals <- DHARMa::simulateResiduals(obj_lm) |
1126 Residuals <- simulateResiduals(objLM) | 1263 |
1127 | 1264 capture.output(test_res <- DHARMa::testResiduals(residuals)) |
1128 capture.output(testRes <- testResiduals(Residuals)) | 1265 test_zero <- DHARMa::testZeroInflation(residuals) |
1129 testZero <- testZeroInflation(Residuals) | |
1130 | 1266 |
1131 ## dispersion of residuals | 1267 ## dispersion of residuals |
1132 | 1268 |
1133 if (testRes$dispersion$p.value > 0.05) # +1.5 if dispersion tests not significative | 1269 if (test_res$dispersion$p.value > 0.05) { # +1.5 if dispersion tests not significative |
1134 { | |
1135 rate <- rate + 1.5 | 1270 rate <- rate + 1.5 |
1136 detres$no_residual_dispersion <- TRUE | 1271 detres$no_residual_dispersion <- TRUE |
1137 }else{ | 1272 }else{ |
1138 detres$no_residual_dispersion <- FALSE | 1273 detres$no_residual_dispersion <- FALSE |
1139 } | 1274 } |
1140 | 1275 |
1141 ## uniformity of residuals | 1276 ## uniformity of residuals |
1142 | 1277 |
1143 if (testRes$uniformity$p.value > 0.05) # +1 if uniformity tests not significative | 1278 if (test_res$uniformity$p.value > 0.05) { # +1 if uniformity tests not significative |
1144 { | 1279 rate <- rate + 1 |
1145 rate <- rate + 1.5 | |
1146 detres$uniform_residuals <- TRUE | 1280 detres$uniform_residuals <- TRUE |
1147 }else{ | 1281 }else{ |
1148 detres$uniform_residuals <- FALSE | 1282 detres$uniform_residuals <- FALSE |
1149 } | 1283 } |
1150 | 1284 |
1151 ## residuals outliers | 1285 ## residuals outliers |
1152 | 1286 |
1153 if (testRes$outliers$p.value > 0.05) # +0.5 if outliers tests not significative | 1287 if (test_res$outliers$p.value > 0.05) { # +0.5 if outliers tests not significative |
1154 { | |
1155 rate <- rate + 0.5 | 1288 rate <- rate + 0.5 |
1156 detres$outliers_proportion_OK <- TRUE | 1289 detres["outliers_proportion_OK"] <- TRUE |
1157 }else{ | 1290 }else{ |
1158 detres$outliers_proportion_OK <- FALSE | 1291 detres["outliers_proportion_OK"] <- FALSE |
1159 } | 1292 } |
1160 | 1293 |
1161 ## Zero inflation test | 1294 ## Zero inflation test |
1162 | 1295 |
1163 if (testZero$p.value > 0.05) # +1 if zero inflation tests not significative | 1296 if (test_zero$p.value > 0.05) { # +1 if zero inflation tests not significative |
1164 { | 1297 rate <- rate + 1 |
1165 rate <- rate + 1.5 | |
1166 detres$no_zero_inflation <- TRUE | 1298 detres$no_zero_inflation <- TRUE |
1167 }else{ | 1299 }else{ |
1168 detres$no_zero_inflation <- FALSE | 1300 detres$no_zero_inflation <- FALSE |
1169 } | 1301 } |
1170 | 1302 |
1171 ## Factors/observations ratio | 1303 ## Factors/observations ratio |
1172 | 1304 |
1173 if (length(listFact)/nrow(na.omit(data)) < 0.1) # +1 if quantity of factors is less than 10% of the quantity of observations | 1305 if (length(list_fact) / nrow(na.omit(data)) < 0.1) { # +1 if quantity of factors is less than 10% of the quantity of observations |
1174 { | |
1175 rate <- rate + 1 | 1306 rate <- rate + 1 |
1176 detres$observation_factor_ratio_OK <- TRUE | 1307 detres["observation_factor_ratio_OK"] <- TRUE |
1177 }else{ | 1308 }else{ |
1178 detres$observation_factor_ratio_OK <- FALSE | 1309 detres["observation_factor_ratio_OK"] <- FALSE |
1179 } | 1310 } |
1180 | 1311 |
1181 ## less than 10 factors' level on random effect | 1312 ## less than 10 factors' level on random effect |
1182 | 1313 |
1183 if (length(grep("^glmmTMB", objLM$call)) > 0) | 1314 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { |
1184 { | 1315 nlev_rand <- c() |
1185 nlevRand <- c() | 1316 for (fact in names(summary(obj_lm)$varcor$cond)) { |
1186 for(fact in names(summary(objLM)$varcor$cond)) | 1317 nlev_rand <- c(nlev_rand, length(unlist(unique(data[, fact])))) |
1187 { | |
1188 nlevRand <- c(nlevRand,length(unlist(unique(data[,fact])))) | |
1189 } | 1318 } |
1190 | 1319 |
1191 if (all(nlevRand > 10)) # +1 if more than 10 levels in one random effect | 1320 if (all(nlev_rand > 10)) { # +1 if more than 10 levels in one random effect |
1192 { | |
1193 rate <- rate + 1 | 1321 rate <- rate + 1 |
1194 detres$enough_levels_random_effect <- TRUE | 1322 detres$enough_levels_random_effect <- TRUE |
1195 }else{ | 1323 }else{ |
1196 detres$enough_levels_random_effect <- FALSE | 1324 detres$enough_levels_random_effect <- FALSE |
1197 } | 1325 } |
1198 }else{} | 1326 } |
1199 | 1327 |
1200 detres$rate <- rate | 1328 detres$rate <- rate |
1201 | 1329 |
1202 if (details) | 1330 if (details) { |
1203 { | 1331 return(detres) |
1204 return(detres) | |
1205 }else{ | 1332 }else{ |
1206 return(rate) | 1333 return(rate) |
1207 } | 1334 } |
1208 | 1335 |
1209 }else{ | 1336 }else{ |
1210 return(NA) | 1337 return(NA) |
1211 cat("Models with quasi distributions can't be rated for now") | 1338 cat("Models with quasi distributions can't be rated for now") |
1212 } | 1339 } |
1213 } | 1340 } |
1214 | 1341 |
1215 ######################################### end of the function noteGLM.f | 1342 ######################################### end of the function note_glm_f |
1216 | 1343 |
1217 ######################################### start of the function noteGLMs.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f | 1344 ######################################### start of the function note_glms_f called by glm_species and glm_community |
1218 | 1345 |
1219 noteGLMs.f <- function(tabRate, exprML, objLM, file_out=FALSE) | 1346 note_glms_f <- function(tab_rate, expr_lm, obj_lm, file_out = FALSE) { |
1220 { | |
1221 ## Purpose: Note your GLM analysis | 1347 ## Purpose: Note your GLM analysis |
1222 ## ---------------------------------------------------------------------- | 1348 ## ---------------------------------------------------------------------- |
1223 ## Arguments: data : rates table from noteGLM.f | 1349 ## Arguments: tab_rate : rates table from note_glm_f |
1224 ## objLM : GLM assessed | 1350 ## expr_lm : GLM expression assessed |
1225 ## metric : selected metric | 1351 ## obj_lm : GLM object |
1226 ## listFact : Analysis factors list | 1352 ## file_out : Output as file ? else global rate only |
1227 ## ---------------------------------------------------------------------- | 1353 ## ---------------------------------------------------------------------- |
1228 ## Author: Coline ROYAUX, 26 june 2020 | 1354 ## Author: Coline ROYAUX, 26 june 2020 |
1229 | 1355 namefile <- "RatingGLM.txt" |
1230 RateM <- mean(na.omit(tabRate[,"rate"])) | 1356 |
1231 sum <- summary(objLM) | 1357 if (length(grep("quasi", obj_lm$family)) == 0) { #DHARMa doesn't work with quasi distributions |
1232 | 1358 |
1233 if (length(grep("^glmmTMB", objLM$call)) > 0) | 1359 rate_m <- median(na.omit(tab_rate[, "rate"])) |
1234 { | 1360 sum <- summary(obj_lm) |
1235 if (median(na.omit(tabRate[,"rate"])) >= 6) # if 50% has a rate superior or equal to 6 +1 | 1361 |
1236 { | 1362 if (length(grep("^glmmTMB", obj_lm$call)) > 0) { |
1237 RateM <- RateM + 1 | 1363 if (median(na.omit(tab_rate[, "rate"])) >= 6) { # if 50% has a rate superior or equal to 6 +1 |
1238 } | 1364 rate_m <- rate_m + 1 |
1239 | 1365 } |
1240 if (quantile(na.omit(tabRate[,"rate"]), probs=0.9) >= 6) # if 90% has a rate superior or equal to 6 +1 | 1366 |
1241 { | 1367 if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 6) { # if 90% has a rate superior or equal to 6 +1 |
1242 RateM <- RateM + 1 | 1368 rate_m <- rate_m + 1 |
1243 } | 1369 } |
1244 }else{ | 1370 }else{ |
1245 if (median(na.omit(tabRate[,"rate"])) >= 5) # if 50% has a rate superior or equal to 5 +1 | 1371 if (median(na.omit(tab_rate[, "rate"])) >= 5) { # if 50% has a rate superior or equal to 5 +1 |
1246 { | 1372 rate_m <- rate_m + 1 |
1247 RateM <- RateM + 1 | 1373 } |
1248 } | 1374 |
1249 | 1375 if (quantile(na.omit(tab_rate[, "rate"]), probs = 0.9) >= 5) { # if 90% has a rate superior or equal to 5 +1 |
1250 if (quantile(na.omit(tabRate[,"rate"]), probs=0.9) >= 5) # if 90% has a rate superior or equal to 5 +1 | 1376 rate_m <- rate_m + 1 |
1251 { | 1377 } |
1252 RateM <- RateM + 1 | 1378 } |
1253 } | 1379 |
1254 } | 1380 if (file_out) { |
1255 | |
1256 if (file_out) | |
1257 { | |
1258 namefile <- "RatingGLM.txt" | |
1259 | 1381 |
1260 cat("###########################################################################", | 1382 cat("###########################################################################", |
1261 "\n########################### Analysis evaluation ###########################", | 1383 "\n########################### Analysis evaluation ###########################", |
1262 "\n###########################################################################", file=namefile, fill=1,append=TRUE) | 1384 "\n###########################################################################", file = namefile, fill = 1, append = TRUE) |
1263 | 1385 |
1264 ## Informations on model : | 1386 ## Informations on model : |
1265 cat("\n\n######################################### \nFitted model:", file=namefile, fill=1,append=TRUE) | 1387 cat("\n\n######################################### \nFitted model:", file = namefile, fill = 1, append = TRUE) |
1266 cat("\t", deparse(exprML), "\n\n", file=namefile, sep="",append=TRUE) | 1388 cat("\t", deparse(expr_lm), "\n\n", file = namefile, sep = "", append = TRUE) |
1267 cat("Family: ", sum$family[[1]], | 1389 cat("Family: ", sum$family[[1]], |
1268 file=namefile,append=TRUE) | 1390 file = namefile, append = TRUE) |
1269 cat("\n\nNumber of analysis: ", nrow(tabRate), file=namefile, append=TRUE) | 1391 cat("\n\nNumber of analysis: ", nrow(tab_rate), file = namefile, append = TRUE) |
1270 | 1392 |
1271 ## Global rate : | 1393 ## Global rate : |
1272 cat("\n\n######################################### \nGlobal rate for all analysis:", | 1394 cat("\n\n######################################### \nGlobal rate for all analysis:", |
1273 "\n\n", RateM, "out of 10", file=namefile, append=TRUE) | 1395 "\n\n", rate_m, "out of 10", file = namefile, append = TRUE) |
1274 | 1396 |
1275 ## details on every GLM : | 1397 ## details on every GLM : |
1276 #NA_proportion_OK=NA, no_residual_dispersion=NA, uniform_residuals=NA, outliers_proportion_OK=NA, no_zero_inflation=NA, observation_factor_ratio_OK=NA, enough_levels_random_effect=NA, rate=NA | 1398 |
1277 cat("\n\n######################################### \nDetails on every analysis:\n\n", file=namefile, append=TRUE) | 1399 cat("\n\n######################################### \nDetails on every analysis:\n\n", file = namefile, append = TRUE) |
1278 cat("Analysis\tC1\tC2\tC3\tC4\tC5\tC6\tC7\tC8\tC9\tFinal rate", file=namefile, append=TRUE) | 1400 cat("Analysis\tC1\tC2\tC3\tC4\tC5\tC6\tC7\tC8\tC9\tFinal rate", file = namefile, append = TRUE) |
1279 apply(tabRate, 1, FUN=function(x) | 1401 apply(tab_rate, 1, FUN = function(x) { |
1280 { | 1402 |
1281 | 1403 if (!is.na(x["complete_plan"]) && x["complete_plan"] == TRUE) { |
1282 if (!is.na(x["complete_plan"]) && x["complete_plan"]==TRUE) | 1404 cat("\n", x[1], "\tyes", file = namefile, append = TRUE) |
1283 { | |
1284 cat("\n",x[1],"\tyes", file=namefile, append=TRUE) | |
1285 }else{ | 1405 }else{ |
1286 cat("\n",x[1],"\tno", file=namefile, append=TRUE) | 1406 cat("\n", x[1], "\tno", file = namefile, append = TRUE) |
1287 } | 1407 } |
1288 | 1408 |
1289 for (i in c("balanced_plan","NA_proportion_OK", "no_residual_dispersion", "uniform_residuals", "outliers_proportion_OK", "no_zero_inflation", "observation_factor_ratio_OK", "enough_levels_random_effect")) | 1409 for (i in c("balanced_plan", "NA_proportion_OK", "no_residual_dispersion", "uniform_residuals", "outliers_proportion_OK", "no_zero_inflation", "observation_factor_ratio_OK", "enough_levels_random_effect")) { |
1290 { | 1410 if (!is.na(x[i]) && x[i] == TRUE) { |
1291 if (!is.na(x[i]) && x[i]==TRUE) | 1411 cat("\tyes", file = namefile, append = TRUE) |
1292 { | |
1293 cat("\tyes", file=namefile, append=TRUE) | |
1294 }else{ | 1412 }else{ |
1295 cat("\tno", file=namefile, append=TRUE) | 1413 cat("\tno", file = namefile, append = TRUE) |
1296 } | 1414 } |
1297 } | 1415 } |
1298 | 1416 |
1299 cat("\t",x["rate"], "/ 8", file=namefile, append=TRUE) | 1417 cat("\t", x["rate"], "/ 8", file = namefile, append = TRUE) |
1300 | 1418 |
1301 | 1419 |
1302 }) | 1420 }) |
1303 cat("\n\nC1: Complete plan?\nC2: Balanced plan?\nC3: Few NA?\nC4: Regular dispersion?\nC5: Uniform residuals?\nC6: Regular outliers proportion?\nC7: No zero-inflation?\nC8: Enough observations for the amount of factors?\nC9: Enough levels on random effect?", file=namefile, append=TRUE) | 1421 cat("\n\nC1: Complete plan?\nC2: Balanced plan?\nC3: Few NA?\nC4: Regular dispersion?\nC5: Uniform residuals?\nC6: Regular outliers proportion?\nC7: No zero-inflation?\nC8: Good observation/factor ratio?\nC9: Enough levels on random effect?", file = namefile, append = TRUE) |
1304 | 1422 |
1305 ## Red flags - advice : | 1423 ## Red flags - advice : |
1306 cat("\n\n######################################### \nRed flags - advice:\n\n", file=namefile, append=TRUE) | 1424 cat("\n\n######################################### \nRed flags - advice:\n\n", file = namefile, append = TRUE) |
1307 if (all(na.omit(tabRate["NA_proportion_OK"]) == FALSE)) | 1425 if (all(na.omit(tab_rate["NA_proportion_OK"]) == FALSE)) { |
1308 { | 1426 cat("\n", "\t- More than 10% of lines of your dataset contains NAs", file = namefile, append = TRUE) |
1309 cat("\n","\t- More than 10% of your dataset bares NAs", file=namefile, append=TRUE) | 1427 } |
1310 }else{} | 1428 |
1311 | 1429 if (length(grep("FALSE", tab_rate["no_residual_dispersion"])) / length(na.omit(tab_rate["no_residual_dispersion"])) > 0.5) { |
1312 if (length(grep("FALSE",tabRate["no_residual_dispersion"])) / length(na.omit(tabRate["no_residual_dispersion"])) > 0.5) | 1430 cat("\n", "\t- More than 50% of your analyses are over- or under- dispersed : Try with another distribution family", file = namefile, append = TRUE) |
1313 { | 1431 } |
1314 cat("\n","\t- More than 50% of your analyses are over- or under- dispersed : Try with another distribution family", file=namefile, append=TRUE) | 1432 |
1315 }else{} | 1433 if (length(grep("FALSE", tab_rate["uniform_residuals"])) / length(na.omit(tab_rate["uniform_residuals"])) > 0.5) { |
1316 | 1434 cat("\n", "\t- More than 50% of your analyses haven't an uniform distribution of residuals : Try with another distribution family", file = namefile, append = TRUE) |
1317 if (length(grep("FALSE",tabRate["uniform_residuals"])) / length(na.omit(tabRate["uniform_residuals"])) > 0.5) | 1435 } |
1318 { | 1436 |
1319 cat("\n","\t- More than 50% of your analyses haven't an uniform distribution of residuals : Try with another distribution family", file=namefile, append=TRUE) | 1437 if (length(grep("FALSE", tab_rate["outliers_proportion_OK"])) / length(na.omit(tab_rate["outliers_proportion_OK"])) > 0.5) { |
1320 }else{} | 1438 cat("\n", "\t- More than 50% of your analyses have too much outliers : Try with another distribution family or try to select or filter your data", file = namefile, append = TRUE) |
1321 | 1439 } |
1322 if (length(grep("FALSE",tabRate["outliers_proportion_OK"])) / length(na.omit(tabRate["outliers_proportion_OK"])) > 0.5) | 1440 |
1323 { | 1441 if (length(grep("FALSE", tab_rate["no_zero_inflation"])) / length(na.omit(tab_rate["no_zero_inflation"])) > 0.5) { |
1324 cat("\n","\t- More than 50% of your analyses have too much outliers : Try with another distribution family or try to select your data", file=namefile, append=TRUE) | 1442 cat("\n", "\t- More than 50% of your analyses have zero inflation : Try to select or filter your data", file = namefile, append = TRUE) |
1325 }else{} | 1443 } |
1326 | 1444 |
1327 if (length(grep("FALSE",tabRate["no_zero_inflation"])) / length(na.omit(tabRate["no_zero_inflation"])) > 0.5) | 1445 if (length(grep("FALSE", tab_rate["observation_factor_ratio_OK"])) / length(na.omit(tab_rate["observation_factor_ratio_OK"])) > 0.5) { |
1328 { | 1446 cat("\n", "\t- More than 50% of your analyses have not enough observations for the amount of factors : Try to use less factors in your analysis or try to use another separation factor", file = namefile, append = TRUE) |
1329 cat("\n","\t- More than 50% of your analyses have zero inflation : Try to select your data", file=namefile, append=TRUE) | 1447 } |
1330 }else{} | 1448 |
1331 | 1449 if (any(tab_rate["enough_levels_random_effect"] == FALSE, na.rm = TRUE) && length(grep("^glmmTMB", obj_lm$call)) > 0) { |
1332 if (length(grep("FALSE",tabRate["observation_factor_ratio_OK"])) / length(na.omit(tabRate["observation_factor_ratio_OK"])) > 0.5) | 1450 cat("\n", "\t- Random effect hasn't enough levels to be robust : If it has less than ten levels remove the random effect", file = namefile, append = TRUE) |
1333 { | 1451 } |
1334 cat("\n","\t- More than 50% of your analyses have not enough observations for the amount of factors : Try to use less factors in your analysis or try to use another separation factor", file=namefile, append=TRUE) | 1452 }else{ |
1335 }else{} | 1453 |
1336 | 1454 return(rate_m) |
1337 if (any(tabRate["enough_levels_random_effect"] == FALSE, na.rm=TRUE) && length(grep("^glmmTMB", objLM$call)) > 0) | 1455 |
1338 { | 1456 } |
1339 cat("\n","\t- Random effect hasn't enough levels to be robust : If it has less than ten levels remove the random effect", file=namefile, append=TRUE) | 1457 }else{ |
1340 }else{} | 1458 cat("Models with quasi distributions can't be rated for now", file = namefile, append = TRUE) |
1341 }else{ | 1459 } |
1342 | 1460 } |
1343 return(RateM) | 1461 |
1344 | 1462 ######################################### end of the function note_glm_f |
1345 } | 1463 |
1346 } | 1464 ######################################### start of the function info_stats_f called by glm_species and glm_community |
1347 | 1465 |
1348 ######################################### end of the function noteGLM.f | 1466 info_stats_f <- function(filename, d_ata, agreg_level = c("species", "unitobs"), type = c("graph", "stat"), |
1349 | 1467 metrique, fact_graph, fact_graph_sel, list_fact, list_fact_sel) { |
1350 ######################################### start of the function infoStats.f called by modeleLineaireWP2.species.f and modeleLineaireWP2.unitobs.f | 1468 ## Purpose: informations and simple statistics |
1351 | 1469 ## ---------------------------------------------------------------------- |
1352 infoStats.f <- function(filename, Data, agregLevel=c("species", "unitobs"), type=c("graph", "stat"), | 1470 ## Arguments: filename : name of file |
1353 metrique, factGraph, factGraphSel, listFact, listFactSel) | 1471 ## d_ata : input data |
1354 { | 1472 ## agreg_level : aggregation level |
1355 ## Purpose: Écrire les infos et statistic sur les données associées à | 1473 ## type : type of function calling |
1356 ## un graphique ou analyse. | 1474 ## metrique : selected metric |
1357 ## ---------------------------------------------------------------------- | 1475 ## fact_graph : selection factor |
1358 ## Arguments: filename : chemin du fichier de résultats. | 1476 ## fact_graph_sel : list of factors levels selected for this factor |
1359 ## Data : données du graphique/de l'analyse. | 1477 ## list_fact : list of grouping factors |
1360 ## agregLevel : niveau d'agrégation de la fonction appelante. | 1478 ## list_fact_sel : list of factors levels selected for these factors |
1361 ## type : type de fonction appelante (grapique ou analyse). | |
1362 ## metrique : la métrique choisie. | |
1363 ## factGraph : le facteur sélection des espèces. | |
1364 ## factGraphSel : la sélection de modalités pour ce dernier | |
1365 ## listFact : liste du (des) facteur(s) de regroupement | |
1366 ## listFactSel : liste des modalités sélectionnées pour ce(s) | |
1367 ## dernier(s) | |
1368 ## ---------------------------------------------------------------------- | 1479 ## ---------------------------------------------------------------------- |
1369 ## Author: Yves Reecht, Date: 10 sept. 2012, 15:26 modified by Coline ROYAUX 04 june 2020 | 1480 ## Author: Yves Reecht, Date: 10 sept. 2012, 15:26 modified by Coline ROYAUX 04 june 2020 |
1370 | 1481 |
1371 ## Open file : | 1482 ## Open file : |
1372 File <- file(description=filename, | 1483 f_ile <- file(description = filename, |
1373 open="w", encoding="latin1") | 1484 open = "w", encoding = "latin1") |
1374 | 1485 |
1375 ## if error : | 1486 ## if error : |
1376 on.exit(if (exists("filename") && | 1487 on.exit(if (exists("filename") && |
1377 tryCatch(isOpen(File), | 1488 tryCatch(isOpen(f_ile), |
1378 error=function(e)return(FALSE))) close(File)) | 1489 error = function(e)return(FALSE))) close(f_ile)) |
1379 | 1490 |
1380 ## Metrics and factors infos : | 1491 ## Metrics and factors infos : |
1381 printSelectionInfo.f(metrique=metrique, #factGraph=factGraph, factGraphSel=factGraphSel, | 1492 print_selection_info_f(metrique = metrique, #fact_graph = fact_graph, fact_graph_sel = fact_graph_sel, |
1382 listFact=listFact, #listFactSel=listFactSel, | 1493 list_fact = list_fact, #list_fact_sel = list_fact_sel, |
1383 File=File, | 1494 f_ile = f_ile, |
1384 agregLevel=agregLevel, type=type) | 1495 agreg_level = agreg_level, type = type) |
1385 | 1496 |
1386 ## statistics : | 1497 ## statistics : |
1387 if (class(Data) == "list") | 1498 if (class(d_ata) == "list") { |
1388 { | |
1389 cat("\n###################################################", | 1499 cat("\n###################################################", |
1390 "\nStatistics per level of splitting factor:\n", | 1500 "\nStatistics per level of splitting factor:\n", |
1391 sep="", file=File,append=TRUE) | 1501 sep = "", file = f_ile, append = TRUE) |
1392 | 1502 |
1393 invisible(sapply(1:length(Data), | 1503 invisible(sapply(seq_len(length(d_ata)), |
1394 function(i) | 1504 function(i) { |
1395 { | 1505 print_stats_f(d_ata = d_ata[[i]], metrique = metrique, list_fact = list_fact, f_ile = f_ile, |
1396 printStats.f(Data=Data[[i]], metrique=metrique, listFact=listFact, File=File, | 1506 headline = fact_graph_sel[i]) |
1397 headline=factGraphSel[i]) | |
1398 })) | 1507 })) |
1399 }else{ | 1508 }else{ |
1400 printStats.f(Data=Data, metrique=metrique, listFact=listFact, File=File, | 1509 print_stats_f(d_ata = d_ata, metrique = metrique, list_fact = list_fact, f_ile = f_ile, |
1401 headline=NULL) | 1510 headline = NULL) |
1402 } | 1511 } |
1403 | 1512 |
1404 ## Close file : | 1513 ## Close file : |
1405 close(File) | 1514 close(f_ile) |
1406 | 1515 |
1407 } | 1516 } |
1408 | 1517 |
1409 ######################################### end of the function infoStats.f | 1518 ######################################### end of the function info_stats_f |
1410 | 1519 |
1411 | 1520 |
1412 ######################################### start of the function printSelectionInfo.f called by infoStats.f | 1521 ######################################### start of the function print_selection_info_f called by info_stats_f |
1413 | 1522 |
1414 printSelectionInfo.f <- function(metrique, listFact, | 1523 print_selection_info_f <- function(metrique, list_fact, |
1415 File, | 1524 f_ile, |
1416 agregLevel=c("species", "unitobs"), type=c("graph", "stat")) | 1525 agreg_level = c("species", "unitobs"), type = c("graph", "stat")) { |
1417 { | |
1418 ## Purpose: Write data informations | 1526 ## Purpose: Write data informations |
1419 ## ---------------------------------------------------------------------- | 1527 ## ---------------------------------------------------------------------- |
1420 ## Arguments: metrique : chosen metric | 1528 ## Arguments: metrique : chosen metric |
1421 ## listFact : factor's list | 1529 ## list_fact : factor's list |
1422 ## File : Results file name | 1530 ## f_ile : Results file name |
1423 ## agregLevel : aggregation level | 1531 ## agreg_level : aggregation level |
1424 ## type : function type | 1532 ## type : function type |
1425 ## ---------------------------------------------------------------------- | 1533 ## ---------------------------------------------------------------------- |
1426 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:41 modified by Coline ROYAUX 04 june 2020 | 1534 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:41 modified by Coline ROYAUX 04 june 2020 |
1427 | 1535 |
1428 cat("\n##################################################\n", | 1536 cat("\n##################################################\n", |
1429 "Metrics and factors (and possible units/selections):\n", | 1537 "Metrics and factors (and possible units/selections):\n", |
1430 sep="", file=File,append=TRUE) | 1538 sep = "", file = f_ile, append = TRUE) |
1431 | 1539 |
1432 ## metric info : | 1540 ## metric info : |
1433 cat("\n Metrics:", metrique, | 1541 cat("\n Metrics:", metrique, |
1434 "\n", file=File,append=TRUE) | 1542 "\n", file = f_ile, append = TRUE) |
1435 | |
1436 ## aggregation level : | |
1437 cat(" aggregated per ", | |
1438 switch(agregLevel, | |
1439 "CL_espece"=,"CL_unitobs"=,"spCL_unitobs"=,"spCL_espece"={ | |
1440 "size class / " | |
1441 }), | |
1442 switch(agregLevel, | |
1443 "CL_espece"=,"spCL_espece"=,"species"=,"spSpecies"=,"spEspece"={ | |
1444 "species / " | |
1445 }), | |
1446 switch(agregLevel, | |
1447 "spUnitobs"=,"spCL_unitobs"=,"spCL_espece"=,"spUnitobs(CL)"=,"spSpecies"=,"spEspece"={ | |
1448 paste(listFact, " (mean over ", sep="") | |
1449 }), | |
1450 "observation units", | |
1451 switch(agregLevel, | |
1452 "spUnitobs"=,"spCL_unitobs"=,"spCL_espece"=,"spUnitobs(CL)"=,"spSpecies"=,"spEspece"={ | |
1453 ")" | |
1454 }), | |
1455 ".\n", | |
1456 sep="", file=File,append=TRUE) | |
1457 | |
1458 ## Separation factors : | |
1459 # switch(agregLevel, | |
1460 # "species"=,"CL_espece"=,"espece"={ # Adapté également pour les LMs. | |
1461 # cat("\n", | |
1462 # switch(type, | |
1463 # "graph"="Graphics separation factor", | |
1464 # "stat"="Analyses separation factor"), | |
1465 # " : ", | |
1466 # ifelse(factGraph == "", "printSelectionInfo.f.11", | |
1467 # ifelse(is.na(factGraphSel[1]), | |
1468 # paste(varNames.f(factGraph, "nom"), "none!"), | |
1469 # paste(varNames.f(factGraph, "nom"), " (", | |
1470 # paste(factGraphSel, collapse=", "), ")", sep=""))), "\n", | |
1471 # sep="", file=File,append=TRUE) | |
1472 # }, | |
1473 # "unitobs"=,"CL_unitobs"=,"unitobs(CL)"=,"spUnitobs"={ | |
1474 # cat("(warning: no selection!!!)", | |
1475 # ifelse(factGraph == "", "\nSelection factor for aggregation of observations: ", | |
1476 # ifelse(is.na(factGraphSel[1]), | |
1477 # paste(varNames.f(factGraph, "nom"), "none (all species/size classes)!"), | |
1478 # paste(varNames.f(factGraph, "nom"), " (", | |
1479 # paste(factGraphSel, collapse=", "), ")", sep=""))), "\n", | |
1480 # sep="", file=File,append=TRUE) | |
1481 # }) | |
1482 | 1543 |
1483 ## Clustering factors : | 1544 ## Clustering factors : |
1484 if (is.element(agregLevel, c("spCL_unitobs", "spCL_espece", "spSpecies", "spEspece", | 1545 if (is.element(agreg_level, c("spCL_unitobs", "spCL_espece", "spSpecies", "spEspece", |
1485 "spUnitobs", "spUnitobs(CL)"))) {type <- "spatialGraph"} | 1546 "spUnitobs", "spUnitobs(CL)"))) { |
1547 type <- "spatialGraph" | |
1548 } | |
1486 | 1549 |
1487 cat(switch(type, | 1550 cat(switch(type, |
1488 "graph"="\nGrouping factor(s): \n * ", | 1551 "graph" = "\nGrouping factor(s): \n * ", |
1489 "stat"="\nAnalyses factor(s): \n * ", | 1552 "stat" = "\nAnalyses factor(s): \n * ", |
1490 "spatialGraph"="\nSpatial aggregation factor(s): \n * "), | 1553 "spatialGraph" = "\nSpatial aggregation factor(s): \n * "), |
1491 paste(listFact,collaspe="\n * "),"\n",file=File,append=TRUE) | 1554 paste(list_fact, collaspe = "\n * "), "\n", file = f_ile, append = TRUE) |
1492 | 1555 |
1493 # invisible(sapply(1:length(listFact), | 1556 } |
1494 # function(i) | 1557 |
1495 # { | 1558 ######################################### end of the function print_selection_info_f |
1496 # cat("\n * ", | 1559 |
1497 # ifelse(is.na(listFactSel[[i]][1]), | 1560 |
1498 # paste(varNames.f(listFact[i], "nom"), "(no selection)"), | 1561 ######################################### start of the function print_stats_f called by info_stats_f |
1499 # paste(varNames.f(listFact[i], "nom"), " (", | 1562 |
1500 # paste(listFactSel[[i]], collapse=", "), ")", sep="")), "\n", | 1563 print_stats_f <- function(d_ata, metrique, list_fact, f_ile, headline = NULL) { |
1501 # sep="", file=File,append=TRUE) | |
1502 # })) | |
1503 } | |
1504 | |
1505 ######################################### end of the function printSelectionInfo.f | |
1506 | |
1507 | |
1508 ######################################### start of the function printStats.f called by infoStats.f | |
1509 | |
1510 printStats.f <- function(Data, metrique, listFact, File, headline=NULL) | |
1511 { | |
1512 ## Purpose: Write general statistics table | 1564 ## Purpose: Write general statistics table |
1513 ## ---------------------------------------------------------------------- | 1565 ## ---------------------------------------------------------------------- |
1514 ## Arguments: Data : Analysis data | 1566 ## Arguments: d_ata : Analysis data |
1515 ## metrique : metric's name | 1567 ## metrique : metric's name |
1516 ## listFact : Factor's list | 1568 ## list_fact : Factor's list |
1517 ## File : Simple statistics file name | 1569 ## f_ile : Simple statistics file name |
1518 ## ---------------------------------------------------------------------- | 1570 ## ---------------------------------------------------------------------- |
1519 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:09 modified by Coline ROYAUX 04 june 2020 | 1571 ## Author: Yves Reecht, Date: 11 sept. 2012, 10:09 modified by Coline ROYAUX 04 june 2020 |
1520 | 1572 |
1521 ## Header : | 1573 ## Header : |
1522 if ( ! is.null(headline)) | 1574 if (! is.null(headline)) { |
1523 { | |
1524 cat("\n", rep("#", nchar(headline) + 3), "\n", | 1575 cat("\n", rep("#", nchar(headline) + 3), "\n", |
1525 "## ", headline, "\n", | 1576 "## ", headline, "\n", |
1526 sep="", file=File,append=TRUE) | 1577 sep = "", file = f_ile, append = TRUE) |
1527 }else{} | 1578 } |
1528 | 1579 |
1529 cat("\n########################\nBase statistics:\n\n", file=File,append=TRUE) | 1580 cat("\n########################\nBase statistics:\n\n", file = f_ile, append = TRUE) |
1530 | 1581 |
1531 capture.output(print(summary.fr(Data[ , metrique])), file=File, append=TRUE) | 1582 capture.output(print(summary_fr(d_ata[, metrique])), file = f_ile, append = TRUE) |
1532 | 1583 |
1533 if ( ! is.null(listFact)) | 1584 if (! is.null(list_fact)) { |
1534 { | |
1535 cat("\n#########################################", | 1585 cat("\n#########################################", |
1536 "\nStatistics per combination of factor levels:\n\n", file=File, sep="",append=TRUE) | 1586 "\nStatistics per combination of factor levels:\n\n", file = f_ile, sep = "", append = TRUE) |
1537 | 1587 |
1538 ## Compute summary for each existing factor's cross : | 1588 ## Compute summary for each existing factor's cross : |
1539 res <- with(Data, | 1589 res <- with(d_ata, |
1540 tapply(eval(parse(text=metrique)), | 1590 tapply(eval(parse(text = metrique)), |
1541 INDEX=do.call(paste, | 1591 INDEX = do.call(paste, |
1542 c(lapply(listFact, | 1592 c(lapply(list_fact, |
1543 function(y)eval(parse(text=y))), | 1593 function(y)eval(parse(text = y))), |
1544 sep=".")), | 1594 sep = ".")), |
1545 FUN=summary.fr)) | 1595 FUN = summary_fr)) |
1546 | 1596 |
1547 ## results in table | 1597 ## results in table |
1548 capture.output(print(do.call(rbind, res)), | 1598 capture.output(print(do.call(rbind, res)), |
1549 file=File, append=TRUE) | 1599 file = f_ile, append = TRUE) |
1550 }else{} | 1600 } |
1551 | 1601 |
1552 ## empty line : | 1602 ## empty line : |
1553 cat("\n", file=File,append=TRUE) | 1603 cat("\n", file = f_ile, append = TRUE) |
1554 } | 1604 } |
1555 | 1605 |
1556 ######################################### end of the function printStats.f | 1606 ######################################### end of the function print_stats_f |
1557 | 1607 |
1558 | 1608 |
1559 ######################################### start of the function summary.fr called by printStats.f | 1609 ######################################### start of the function summary_fr called by print_stats_f |
1560 summary.fr <- function(object, digits = max(3, getOption("digits") - 3),...) | 1610 summary_fr <- function(object, digits = max(3, getOption("digits") - 3), ...) { |
1561 { | |
1562 ## Purpose: Adding SD and N to summary | 1611 ## Purpose: Adding SD and N to summary |
1563 ## ---------------------------------------------------------------------- | 1612 ## ---------------------------------------------------------------------- |
1564 ## Arguments: object : Object to summarise | 1613 ## Arguments: object : Object to summarise |
1565 ## ---------------------------------------------------------------------- | 1614 ## ---------------------------------------------------------------------- |
1566 ## Author: Yves Reecht, Date: 13 sept. 2012, 15:47 modified by Coline ROYAUX 04 june 2020 | 1615 ## Author: Yves Reecht, Date: 13 sept. 2012, 15:47 modified by Coline ROYAUX 04 june 2020 |
1567 | 1616 |
1568 if ( ! is.numeric(object)) stop("Programming error") | 1617 if (! is.numeric(object)) stop("Programming error") |
1569 | 1618 |
1570 ## Compute summary : | 1619 ## Compute summary : |
1571 res <- c(summary(object=object, digits, ...), "sd"=signif(sd(x=object), digits=digits), "N"=length(object)) | 1620 res <- c(summary(object = object, digits, ...), "sd" = signif(sd(x = object), digits = digits), "N" = length(object)) |
1572 | 1621 |
1573 return(res) | 1622 return(res) |
1574 } | 1623 } |
1575 | 1624 |
1576 ######################################### start of the function summary.fr | 1625 ######################################### start of the function summary_fr |
1577 | |
1578 ######################################### Package DHARMa | |
1579 | |
1580 ################ simulateResiduals.R | |
1581 | |
1582 #' Create simulated residuals | |
1583 #' | |
1584 #' The function creates scaled residuals by simulating from the fitted model. Residuals can be extracted with \code{\link{residuals.DHARMa}}. See \code{\link{testResiduals}} for an overview of residual tests, \code{\link{plot.DHARMa}} for an overview of available plots. | |
1585 #' | |
1586 #' @param fittedModel a fitted model of a class supported by DHARMa | |
1587 #' @param n number of simulations. Default is 100. A more save value would be 250 or even 1000. The smaller the number, the higher the stochastic error on the residuals. Also, for very small n, discretization artefacts can influence the tests. | |
1588 #' @param refit if FALSE, new data will be simulated and scaled residuals will be created by comparing observed data with new data. If TRUE, the model will be refit on the simulated data (parametric bootstrap), and scaled residuals will be created by comparing observed with refitted residuals. | |
1589 #' @param integerResponse if TRUE, noise will be added at to the residuals to maintain a uniform expectations for integer responses (such as Poisson or Binomial). Usually, the model will automatically detect the appropriate setting, so there is no need to adjust this setting. | |
1590 #' @param plot if TRUE, \code{\link{plotResiduals}} will be directly run after the residuals have been calculated | |
1591 #' @param ... parameters to pass to the simulate function of the model object. An important use of this is to specify whether simulations should be conditional on the current random effect estimates, e.g. via re.form. Note that not all models support syntax to specify conditionao or unconditional simulations. See also details | |
1592 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details. | |
1593 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}} | |
1594 #' @return An S3 class of type "DHARMa", essentially a list with various elements. Implemented S3 functions include plot, print and \code{\link{residuals.DHARMa}}. Residuals returns the calculated scaled residuals. | |
1595 #' | |
1596 #' @details There are a number of important considerations when simulating from a more complex (hierarchical) model: | |
1597 #' | |
1598 #' \strong{Re-simulating random effects / hierarchical structure}: in a hierarchical model, we have several stochastic processes aligned on top of each other. Specifically, in a GLMM, we have a lower level stochastic process (random effect), whose result enters into a higher level (e.g. Poisson distribution). For other hierarchical models such as state-space models, similar considerations apply. | |
1599 #' | |
1600 #' In such a situation, we have to decide if we want to re-simulate all stochastic levels, or only a subset of those. For example, in a GLMM, it is common to only simulate the last stochastic level (e.g. Poisson) conditional on the fitted random effects. This is often referred to as a conditional simuation. For controlling how many levels should be re-simulated, the simulateResidual function allows to pass on parameters to the simulate function of the fitted model object. Please refer to the help of the different simulate functions (e.g. ?simulate.merMod) for details. For merMod (lme4) model objects, the relevant parameters are parameters are use.u and re.form | |
1601 #' | |
1602 #' If the model is correctly specified, the simulated residuals should be flat regardless how many hierarchical levels we re-simulate. The most thorough procedure would therefore be to test all possible options. If testing only one option, I would recommend to re-simulate all levels, because this essentially tests the model structure as a whole. This is the default setting in the DHARMa package. A potential drawback is that re-simulating the lower-level random effects creates more variability, which may reduce power for detecting problems in the upper-level stochastic processes. In particular dispersion tests may produce different results when switching from conditional to unconditional simulations, and often the conditional simulation is more sensitive. | |
1603 #' | |
1604 #' \strong{Integer responses}: a second complication is the treatment of inter responses. Imaging we have observed a 0, and we predict 30\% zeros - what is the quantile that we should display for the residual? To deal with this problem and maintain a uniform response, the option integerResponse adds a uniform noise from -0.5 to 0.5 on the simulated and observed response, which creates a uniform distribution - you can see this via hist(ecdf(runif(10000))(runif(10000))). | |
1605 #' | |
1606 #' DHARMa will try to automatically if the fitted model has an integer or discrete distribution via the family argument. However, in some cases the family does not allow to uniquely identify the distribution type. For example, a tweedie distribution can be inter or continuous. Therefore, DHARMa will additionally check the simulation results for repeated values, and will change the distribution type if repeated values are found (a message is displayed in this case). | |
1607 #' | |
1608 #' \strong{Refitting or not}: a third issue is how residuals are calculated. simulateResiduals has two options that are controlled by the refit parameter: | |
1609 #' | |
1610 #' 1. if refit = FALSE (default), new data is simulated from the fitted model, and residuals are calculated by comparing the observed data to the new data | |
1611 #' | |
1612 #' 2. if refit = TRUE, a parametric bootstrap is performed, meaning that the model is refit on the new data, and residuals are created by comparing observed residuals against refitted residuals. I advise against using this method per default (see more comments in the vignette), unless you are really sure that you need it. | |
1613 #' | |
1614 #' \strong{Residuals per group}: In many situations, it can be useful to look at residuals per group, e.g. to see how much the model over / underpredicts per plot, year or subject. To do this, use \code{\link{recalculateResiduals}}, together with a grouping variable (see also help) | |
1615 #' | |
1616 #' \strong{Transformation to other distributions}: DHARMa calculates residuals for which the theoretical expectation (assuming a correctly specified model) is uniform. To transfor this residuals to another distribution (e.g. so that a correctly specified model will have normal residuals) see \code{\link{residuals.DHARMa}}. | |
1617 #' | |
1618 #' @seealso \code{\link{testResiduals}}, \code{\link{plot.DHARMa}}, \code{\link{plotResiduals}}, \code{\link{print.DHARMa}}, \code{\link{residuals.DHARMa}}, \code{\link{recalculateResiduals}} | |
1619 #' | |
1620 #' | |
1621 #' @example inst/examples/simulateResidualsHelp.R | |
1622 #' @import stats | |
1623 #' @export | |
1624 simulateResiduals <- function(fittedModel, n = 250, refit = F, integerResponse = NULL, plot = F, seed = 123, method = c("PIT", "traditional"), ...){ | |
1625 | |
1626 ######## general assertions and startup calculations ########## | |
1627 | |
1628 if (n < 2) stop("error in DHARMa::simulateResiduals: n > 1 is required to calculate scaled residuals") | |
1629 checkModel(fittedModel) | |
1630 match.arg(method) | |
1631 randomState <-getRandomState(seed) | |
1632 on.exit({randomState$restoreCurrent()}) | |
1633 ptm <- proc.time() | |
1634 | |
1635 ####### extract model info ############ | |
1636 | |
1637 out = list() | |
1638 | |
1639 family = family(fittedModel) | |
1640 out$fittedModel = fittedModel | |
1641 out$modelClass = class(fittedModel)[1] | |
1642 | |
1643 out$nObs = nobs(fittedModel) | |
1644 out$nSim = n | |
1645 out$refit = refit | |
1646 out$observedResponse = getObservedResponse(fittedModel) | |
1647 | |
1648 if(is.null(integerResponse)){ | |
1649 if (family$family %in% c("binomial", "poisson", "quasibinomial", "quasipoisson", "Negative Binom", "nbinom2", "nbinom1", "genpois", "compois", "truncated_poisson", "truncated_nbinom2", "truncated_nbinom1", "betabinomial", "Poisson", "Tpoisson", "COMPoisson", "negbin", "Tnegbin") | grepl("Negative Binomial",family$family) ) integerResponse = TRUE | |
1650 else integerResponse = FALSE | |
1651 } | |
1652 out$integerResponse = integerResponse | |
1653 | |
1654 out$problems = list() | |
1655 | |
1656 # re-form should be set to ~0 to avoid spurious residual patterns, see https://github.com/florianhartig/DHARMa/issues/43 | |
1657 | |
1658 if(out$modelClass %in% c("HLfit")){ | |
1659 out$fittedPredictedResponse = predict(fittedModel, type = "response", re.form = ~0)[,1L] | |
1660 }else{ | |
1661 out$fittedPredictedResponse = predict(fittedModel, type = "response", re.form = ~0) | |
1662 } | |
1663 | |
1664 out$fittedFixedEffects = getFixedEffects(fittedModel) | |
1665 out$fittedResiduals = residuals(fittedModel, type = "response") | |
1666 | |
1667 ######## refit = F ################## | |
1668 | |
1669 if (refit == FALSE){ | |
1670 | |
1671 out$simulatedResponse = getSimulations(fittedModel, nsim = n, type = "normal", ...) | |
1672 | |
1673 checkSimulations(out$simulatedResponse, out$nObs, out$nSim) | |
1674 | |
1675 out$scaledResiduals = getQuantile(simulations = out$simulatedResponse , observed = out$observedResponse , integerResponse = integerResponse, method = method) | |
1676 | |
1677 ######## refit = T ################## | |
1678 } else { | |
1679 | |
1680 # Adding new outputs | |
1681 | |
1682 out$refittedPredictedResponse <- matrix(nrow = out$nObs, ncol = n ) | |
1683 out$refittedFixedEffects <- matrix(nrow = length(out$fittedFixedEffects), ncol = n ) | |
1684 #out$refittedRandomEffects <- matrix(nrow = length(out$fittedRandomEffects), ncol = n ) | |
1685 out$refittedResiduals = matrix(nrow = out$nObs, ncol = n) | |
1686 out$refittedPearsonResiduals = matrix(nrow = out$nObs, ncol = n) | |
1687 | |
1688 out$simulatedResponse = getSimulations(fittedModel, nsim = n, type = "refit", ...) | |
1689 | |
1690 for (i in 1:n){ | |
1691 | |
1692 simObserved = out$simulatedResponse[[i]] | |
1693 | |
1694 try({ | |
1695 | |
1696 # for testing | |
1697 # if (i==3) stop("x") | |
1698 # Note: also set silent = T for production | |
1699 | |
1700 refittedModel = getRefit(fittedModel, simObserved) | |
1701 | |
1702 out$refittedPredictedResponse[,i] = predict(refittedModel, type = "response") | |
1703 out$refittedFixedEffects[,i] = getFixedEffects(refittedModel) | |
1704 out$refittedResiduals[,i] = residuals(refittedModel, type = "response") | |
1705 out$refittedPearsonResiduals[,i] = residuals(refittedModel, type = "pearson") | |
1706 #out$refittedRandomEffects[,i] = ranef(refittedModel) | |
1707 }, silent = TRUE) | |
1708 } | |
1709 | |
1710 ######### residual checks ########### | |
1711 | |
1712 if(anyNA(out$refittedResiduals)) warning("DHARMa::simulateResiduals warning: on refit = TRUE, at least one of the refitted models produced an error. Inspect the refitted model values. Results may not be reliable.") | |
1713 | |
1714 ## check for convergence problems | |
1715 | |
1716 dup = sum(duplicated(out$refittedFixedEffects, MARGIN = 2)) | |
1717 if (dup > 0){ | |
1718 if (dup < n/3){ | |
1719 warning(paste("There were", dup, "of", n ,"duplicate parameter estimates in the refitted models. This may hint towards a problem with optimizer convergence in the fitted models. Results may not be reliable. The suggested action is to not use the refitting procedure, and diagnose with tools available for the normal (not refitted) simulated residuals. If you absolutely require the refitting procedure, try changing tolerance / iterations in the optimizer settings.")) | |
1720 } else { | |
1721 warning(paste("There were", dup, "of", n ,"duplicate parameter estimates in the refitted models. This may hint towards a problem with optimizer convergence in the fitted models. Results are likely not reliable. The suggested action is to not use the refitting procedure, and diagnose with tools available for the normal (not refitted) simulated residuals. If you absolutely require the refitting procedure, try changing tolerance / iterations in the optimizer settings.")) | |
1722 out$problems[[length(out$problems)+ 1]] = "error in refit" | |
1723 } | |
1724 } | |
1725 | |
1726 ######### residual calculations ########### | |
1727 | |
1728 out$scaledResiduals = getQuantile(simulations = out$refittedResiduals, observed = out$fittedResiduals, integerResponse = integerResponse, method = method) | |
1729 } | |
1730 | |
1731 ########### Wrapup ############ | |
1732 | |
1733 out$time = proc.time() - ptm | |
1734 out$randomState = randomState | |
1735 | |
1736 class(out) = "DHARMa" | |
1737 | |
1738 if(plot == TRUE) plot(out) | |
1739 | |
1740 return(out) | |
1741 } | |
1742 | |
1743 getPossibleModels<-function()c("lm", "glm", "negbin", "lmerMod", "glmerMod", "gam", "bam", "glmmTMB", "HLfit") | |
1744 | |
1745 | |
1746 | |
1747 #' Check if the fitted model is supported by DHARMa | |
1748 #' | |
1749 #' The function checks if the fitted model is supported by DHARMa, and if there are other issues that could create problems | |
1750 #' | |
1751 #' @param fittedModel a fitted model | |
1752 #' @param stop whether to throw an error if the model is not supported by DHARMa | |
1753 #' | |
1754 #' @details The main purpose of this function os to check if the fitted model class is supported by DHARMa. The function additionally checks for properties of the fitted model that could create problems for calculating residuals or working with the resuls in DHARMa. | |
1755 #' | |
1756 #' | |
1757 #' @keywords internal | |
1758 checkModel <- function(fittedModel, stop = F){ | |
1759 | |
1760 out = T | |
1761 | |
1762 if(!(class(fittedModel)[1] %in% getPossibleModels())){ | |
1763 if(stop == FALSE) warning("DHARMa: fittedModel not in class of supported models. Absolutely no guarantee that this will work!") | |
1764 else stop("DHARMa: fittedModel not in class of supported models") | |
1765 } | |
1766 | |
1767 # if(hasNA(fittedModel)) message("It seems there were NA values in the data used for fitting the model. This can create problems if you supply additional data to DHARMa functions. See ?checkModel for details") | |
1768 | |
1769 # TODO: check as implemented does not work reliably, check if there is any other option to check for NA | |
1770 # #' @example inst/examples/checkModelHelp.R | |
1771 | |
1772 # NA values in the data: checkModel will detect if there were NA values in the data frame. For NA values, most regression models will remove the entire observation from the data. This is not a problem for DHARMa - residuals are then only calculated for non-NA rows in the data. However, if you provide additional predictors to DHARMa, for example to plot residuals against a predictor, you will have to remove all NA rows that were also removed in the model. For most models, you can get the rows of the data that were actually used in the fit via rownames(model.frame(fittedModel)) | |
1773 | |
1774 | |
1775 if (class(fittedModel)[1] == "gam" ) if (class(fittedModel$family)[1] == "extended.family") stop("It seems you are trying to fit a model from mgcv that was fit with an extended.family. Simulation functions for these families are not yet implemented in DHARMa. See issue https://github.com/florianhartig/DHARMa/issues/11 for updates about this") | |
1776 | |
1777 } | |
1778 | |
1779 | |
1780 | |
1781 #' Check simulated data | |
1782 #' | |
1783 #' The function checks if the simulated data seems fine | |
1784 #' | |
1785 #' @param simulatedResponse the simulated response | |
1786 #' @param nObs number of observations | |
1787 #' @param nSim number of simulations | |
1788 #' | |
1789 #' @keywords internal | |
1790 checkSimulations <- function(simulatedResponse, nObs, nSim){ | |
1791 | |
1792 if(!inherits(simulatedResponse, "matrix")) securityAssertion("Simulation from the model produced wrong class", stop = T) | |
1793 | |
1794 if(any(dim(simulatedResponse) != c(nObs, nSim) )) securityAssertion("Simulation from the model produced wrong dimension", stop = T) | |
1795 | |
1796 if(any(!is.finite(simulatedResponse))) message("Simulations from your fitted model produce infinite values. Consider if this is sensible") | |
1797 | |
1798 if(any(is.nan(simulatedResponse))) securityAssertion("Simulations from your fitted model produce NaN values. DHARMa cannot calculated residuals for this. This is nearly certainly an error of the regression package you are using", stop = T) | |
1799 if(any(is.na(simulatedResponse))) securityAssertion("Simulations from your fitted model produce NA values. DHARMa cannot calculated residuals for this. This is nearly certainly an error of the regression package you are using", stop = T) | |
1800 | |
1801 } | |
1802 | |
1803 | |
1804 | |
1805 | |
1806 #' Recalculate residuals with grouping | |
1807 #' | |
1808 #' The purpose of this function is to recalculate scaled residuals per group, based on the simulations done by \code{\link{simulateResiduals}} | |
1809 #' | |
1810 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}} | |
1811 #' @param group group of each data point | |
1812 #' @param aggregateBy function for the aggregation. Default is sum. This should only be changed if you know what you are doing. Note in particular that the expected residual distribution might not be flat any more if you choose general functions, such as sd etc. | |
1813 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details. | |
1814 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}} | |
1815 #' @return an object of class DHARMa, similar to what is returned by \code{\link{simulateResiduals}}, but with additional outputs for the new grouped calculations. Note that the relevant outputs are 2x in the object, the first is the grouped calculations (which is returned by $name access), and later another time, under identical name, the original output. Moreover, there is a function 'aggregateByGroup', which can be used to aggregate predictor variables in the same way as the variables calculated here | |
1816 #' | |
1817 #' @example inst/examples/simulateResidualsHelp.R | |
1818 #' @export | |
1819 recalculateResiduals <- function(simulationOutput, group = NULL, aggregateBy = sum, seed = 123, method = c("PIT", "traditional")){ | |
1820 | |
1821 randomState <-getRandomState(seed) | |
1822 on.exit({randomState$restoreCurrent()}) | |
1823 match.arg(method) | |
1824 | |
1825 if(!is.null(simulationOutput$original)) simulationOutput = simulationOutput$original | |
1826 | |
1827 out = list() | |
1828 out$original = simulationOutput | |
1829 | |
1830 if(is.null(group)) return(simulationOutput) | |
1831 else group =as.factor(group) | |
1832 out$nGroups = nlevels(group) | |
1833 | |
1834 aggregateByGroup <- function(x) aggregate(x, by=list(group), FUN=aggregateBy)[,2] | |
1835 | |
1836 out$observedResponse = aggregateByGroup(simulationOutput$observedResponse) | |
1837 out$fittedPredictedResponse = aggregateByGroup(simulationOutput$fittedPredictedResponse) | |
1838 | |
1839 if (simulationOutput$refit == F){ | |
1840 | |
1841 out$simulatedResponse = apply(simulationOutput$simulatedResponse, 2, aggregateByGroup) | |
1842 out$scaledResiduals = getQuantile(simulations = out$simulatedResponse , observed = out$observedResponse , integerResponse = simulationOutput$integerResponse, method = method) | |
1843 | |
1844 ######## refit = T ################## | |
1845 } else { | |
1846 | |
1847 out$refittedPredictedResponse <- apply(simulationOutput$refittedPredictedResponse, 2, aggregateByGroup) | |
1848 out$fittedResiduals = aggregateByGroup(simulationOutput$fittedResiduals) | |
1849 out$refittedResiduals = apply(simulationOutput$refittedResiduals, 2, aggregateByGroup) | |
1850 out$refittedPearsonResiduals = apply(simulationOutput$refittedPearsonResiduals, 2, aggregateByGroup) | |
1851 | |
1852 out$scaledResiduals = getQuantile(simulations = out$refittedResiduals , observed = out$fittedResiduals , integerResponse = simulationOutput$integerResponse, method = method) | |
1853 | |
1854 } | |
1855 | |
1856 # hack - the c here will result in both old and new outputs to be present resulting output, but a named access should refer to the new, grouped calculations | |
1857 # question to myself - what's the use of that, why not erase the old outputs? they are anyway saved in the old object | |
1858 | |
1859 out$aggregateByGroup = aggregateByGroup | |
1860 out = c(out, simulationOutput) | |
1861 out$randomState = randomState | |
1862 class(out) = "DHARMa" | |
1863 return(out) | |
1864 } | |
1865 | |
1866 ################ simulateResiduals.R | |
1867 | |
1868 ################ DHARMa.R | |
1869 | |
1870 #' @title DHARMa - Residual Diagnostics for HierArchical (Multi-level / Mixed) Regression Models | |
1871 #' @name DHARMa | |
1872 #' @docType package | |
1873 #' @description The 'DHARMa' package uses a simulation-based approach to create readily interpretable scaled (quantile) residuals for fitted (generalized) linear mixed models. Currently supported are linear and generalized linear (mixed) models from 'lme4' (classes 'lmerMod', 'glmerMod'), 'glmmTMB' and 'spaMM', generalized additive models ('gam' from 'mgcv'), 'glm' (including 'negbin' from 'MASS', but excluding quasi-distributions) and 'lm' model classes. Moreover, externally created simulations, e.g. posterior predictive simulations from Bayesian software such as 'JAGS', 'STAN', or 'BUGS' can be processed as well. The resulting residuals are standardized to values between 0 and 1 and can be interpreted as intuitively as residuals from a linear regression. The package also provides a number of plot and test functions for typical model misspecification problems, such as over/underdispersion, zero-inflation, and residual spatial and temporal autocorrelation. | |
1874 #' @details See index / vignette for details | |
1875 #' @seealso \code{\link{simulateResiduals}} | |
1876 #' @examples | |
1877 #' vignette("DHARMa", package="DHARMa") | |
1878 NULL | |
1879 | |
1880 | |
1881 #' Print simulated residuals | |
1882 #' | |
1883 #' @param x an object with simulated residuals created by \code{\link{simulateResiduals}} | |
1884 #' @param ... optional arguments for compatibility with the generic function, no function implemented | |
1885 #' @export | |
1886 print.DHARMa <- function(x, ...){ | |
1887 cat(paste("Object of Class DHARMa with simulated residuals based on", x$nSim, "simulations with refit =", x$refit , ". See ?DHARMa::simulateResiduals for help."), "\n", "\n") | |
1888 if (length(x$scaledResiduals) < 20) cat("Scaled residual values:", x$scaledResiduals) | |
1889 else { | |
1890 cat("Scaled residual values:", x$scaledResiduals[1:20], "...") | |
1891 } | |
1892 } | |
1893 | |
1894 #' Return residuals of a DHARMa simulation | |
1895 #' | |
1896 #' @param object an object with simulated residuals created by \code{\link{simulateResiduals}} | |
1897 #' @param quantileFunction optional - a quantile function to transform the uniform 0/1 scaling of DHARMa to another distribution | |
1898 #' @param outlierValues if a quantile function with infinite support (such as dnorm) is used, residuals that are 0/1 are mapped to -Inf / Inf. outlierValues allows to convert -Inf / Inf values to an optional min / max value. | |
1899 #' @param ... optional arguments for compatibility with the generic function, no function implemented | |
1900 #' @details the function accesses the slot $scaledResiduals in a fitted DHARMa object, and optionally transforms the standard DHARMa quantile residuals (which have a uniform distribution) to a particular pdf. | |
1901 #' | |
1902 #' @note some of the papers on simulated quantile residuals transforming the residuals (which are natively uniform) back to a normal distribution. I presume this is because of the larger familiarity of most users with normal residuals. Personally, I never considered this desirable, for the reasons explained in https://github.com/florianhartig/DHARMa/issues/39, but with this function, I wanted to give users the option to plot normal residuals if they so wish. | |
1903 #' | |
1904 #' @export | |
1905 #' @example inst/examples/simulateResidualsHelp.R | |
1906 #' | |
1907 residuals.DHARMa <- function(object, quantileFunction = NULL, outlierValues = NULL, ...){ | |
1908 | |
1909 if(is.null(quantileFunction)){ | |
1910 return(object$scaledResiduals) | |
1911 } else { | |
1912 res = quantileFunction(object$scaledResiduals) | |
1913 if(!is.null(outlierValues)){ | |
1914 res = ifelse(res == -Inf, outlierValues[1], res) | |
1915 res = ifelse(res == Inf, outlierValues[2], res) | |
1916 } | |
1917 return(res) | |
1918 } | |
1919 } | |
1920 | |
1921 | |
1922 | |
1923 #' Return outliers | |
1924 #' | |
1925 #' Returns the outliers of a DHARMa object | |
1926 #' | |
1927 #' @param object an object with simulated residuals created by \code{\link{simulateResiduals}} | |
1928 #' @param lowerQuantile lower threshold for outliers. Default is zero = outside simulation envelope | |
1929 #' @param upperQuantile upper threshold for outliers. Default is 1 = outside simulation envelope | |
1930 #' @param return wheter to return an indices of outliers or a logical vector | |
1931 #' | |
1932 #' @details First of all, note that the standard definition of outlier in the DHARMa plots and outlier tests is an observation that is outside the simulation envelope. How far outside that is depends a lot on how many simulations you do. If you have 100 data points and to 100 simulations, you would expect to have one "outlier" on average, even with a perfectly fitting model. This is in fact what the outlier test tests. | |
1933 #' | |
1934 #' Thus, keep in mind that for a small number of simulations, outliers are mostly a technical term: these are points that are outside our simulations, but we don't know how far away they are. | |
1935 #' | |
1936 #' If you are seriously interested in HOW FAR outside the expected distribution a data point is, you should increase the number of simulations in \code{\link{simulateResiduals}} to be sure to get the tail of the data distribution correctly. In this case, it may make sense to adjust lowerQuantile and upperQuantile, e.g. to 0.025, 0.975, which would define outliers as values outside the central 95% of the distribution. | |
1937 #' | |
1938 #' Also, note that outliers are particularly concerning if they have a strong influence on the model fit. One could test the influence, for example, by removing them from the data, or by some meausures of leverage, e.g. generalisations for Cook's distance as in Pinho, L. G. B., Nobre, J. S., & Singer, J. M. (2015). Cook’s distance for generalized linear mixed models. Computational Statistics & Data Analysis, 82, 126–136. doi:10.1016/j.csda.2014.08.008. At the moment, however, no such function is provided in DHARMa. | |
1939 #' | |
1940 #' @export | |
1941 #' | |
1942 outliers <- function(object, lowerQuantile = 0, upperQuantile = 1, return = c("index", "logical")){ | |
1943 | |
1944 return = match.arg(return) | |
1945 | |
1946 out = residuals(object) >= upperQuantile | residuals(object) <= lowerQuantile | |
1947 | |
1948 if(return == "logical") return(out) | |
1949 else(return(which(out))) | |
1950 } | |
1951 | |
1952 | |
1953 | |
1954 #' Create a DHARMa object from hand-coded simulations or Bayesian posterior predictive simulations | |
1955 #' | |
1956 #' @param simulatedResponse matrix of observations simulated from the fitted model - row index for observations and colum index for simulations | |
1957 #' @param observedResponse true observations | |
1958 #' @param fittedPredictedResponse optional fitted predicted response. For Bayesian posterior predictive simulations, using the median posterior prediction as fittedPredictedResponse is recommended. If not provided, the mean simulatedResponse will be used. | |
1959 #' @param integerResponse if T, noise will be added at to the residuals to maintain a uniform expectations for integer responses (such as Poisson or Binomial). Unlike in \code{\link{simulateResiduals}}, the nature of the data is not automatically detected, so this MUST be set by the user appropriately | |
1960 #' @param seed the random seed to be used within DHARMa. The default setting, recommended for most users, is keep the random seed on a fixed value 123. This means that you will always get the same randomization and thus teh same result when running the same code. NULL = no new seed is set, but previous random state will be restored after simulation. FALSE = no seed is set, and random state will not be restored. The latter two options are only recommended for simulation experiments. See vignette for details. | |
1961 #' @param method the quantile randomization method used. The two options implemented at the moment are probability integral transform (PIT-) residuals (current default), and the "traditional" randomization procedure, that was used in DHARMa until version 0.3.0. For details, see \code{\link{getQuantile}} | |
1962 #' @details The use of this function is to convert simulated residuals (e.g. from a point estimate, or Bayesian p-values) to a DHARMa object, to make use of the plotting / test functions in DHARMa | |
1963 #' @note Either scaled residuals or (simulatedResponse AND observed response) have to be provided | |
1964 #' @example inst/examples/createDharmaHelp.R | |
1965 #' @export | |
1966 createDHARMa <- function(simulatedResponse , observedResponse , fittedPredictedResponse = NULL, integerResponse = F, seed = 123, method = c("PIT", "traditional")){ | |
1967 | |
1968 randomState <-getRandomState(seed) | |
1969 on.exit({randomState$restoreCurrent()}) | |
1970 match.arg(method) | |
1971 | |
1972 out = list() | |
1973 out$simulatedResponse = simulatedResponse | |
1974 out$refit = F | |
1975 out$integerResponse = integerResponse | |
1976 out$observedResponse = observedResponse | |
1977 | |
1978 if(!is.matrix(simulatedResponse) & !is.null(observedResponse)) stop("either scaled residuals or simulations and observations have to be provided") | |
1979 if(ncol(simulatedResponse) < 2) stop("simulatedResponse with less than 2 simulations provided - cannot calculate residuals on that.") | |
1980 | |
1981 if(ncol(simulatedResponse) < 10) warning("simulatedResponse with less than 10 simulations provided. This rarely makes sense") | |
1982 | |
1983 out$nObs = length(observedResponse) | |
1984 | |
1985 if (out$nObs < 3) stop("warning - number of observations < 3 ... this rarely makes sense") | |
1986 | |
1987 if(! (out$nObs == nrow(simulatedResponse))) stop("dimensions of observedResponse and simulatedResponse do not match") | |
1988 | |
1989 out$nSim = ncol(simulatedResponse) | |
1990 | |
1991 out$scaledResiduals = getQuantile(simulations = simulatedResponse , observed = observedResponse , integerResponse = integerResponse, method = method) | |
1992 | |
1993 | |
1994 # makes sure that DHARM plots that rely on this vector won't crash | |
1995 if(is.null(fittedPredictedResponse)){ | |
1996 message("No fitted predicted response provided, using the mean of the simulations") | |
1997 fittedPredictedResponse = apply(simulatedResponse, 1, mean) | |
1998 } | |
1999 out$fittedPredictedResponse = fittedPredictedResponse | |
2000 out$randomState = randomState | |
2001 class(out) = "DHARMa" | |
2002 return(out) | |
2003 } | |
2004 | |
2005 | |
2006 #' Ensures that an object is of class DHARMa | |
2007 #' | |
2008 #' @param simulationOutput a DHARMa simulation output or an object that can be converted into a DHARMa simulation output | |
2009 #' @param convert if TRUE, attempts to convert model + numeric to DHARMa, if "Model", converts only supported models to DHARMa | |
2010 #' @details The | |
2011 #' @return an object of class DHARMa | |
2012 #' @keywords internal | |
2013 ensureDHARMa <- function(simulationOutput, | |
2014 convert = F){ | |
2015 | |
2016 if(inherits(simulationOutput, "DHARMa")){ | |
2017 return(simulationOutput) | |
2018 } else { | |
2019 | |
2020 if(convert == FALSE) stop("wrong argument to function, simulationOutput must be a DHARMa object!") | |
2021 else { | |
2022 | |
2023 if (class(simulationOutput)[1] %in% getPossibleModels()){ | |
2024 if (convert == "Model" | convert == T) return(simulateResiduals(simulationOutput)) | |
2025 } else if(is.vector(simulationOutput, mode = "numeric") & convert == T) { | |
2026 out = list() | |
2027 out$scaledResiduals = simulationOutput | |
2028 out$nObs = length(out$scaledResiduals) | |
2029 class(out) = "DHARMa" | |
2030 return(out) | |
2031 } | |
2032 } | |
2033 } | |
2034 stop("wrong argument to function, simulationOutput must be a DHARMa object or a numeric vector of quantile residuals!") | |
2035 } | |
2036 | |
2037 ####################### DHARMa.R | |
2038 | |
2039 ####################### tests.R | |
2040 | |
2041 #' DHARMa general residual test | |
2042 #' | |
2043 #' Calls both uniformity and dispersion test | |
2044 #' | |
2045 #' This function is a wrapper for the various test functions implemented in DHARMa. Currently, this function calls the \code{\link{testUniformity}} and the \code{\link{testDispersion}} functions. All other tests (see list below) have to be called by hand. | |
2046 #' | |
2047 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2048 #' @param plot if T, plots functions of the tests are called | |
2049 #' @author Florian Hartig | |
2050 #' @seealso \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2051 #' @example inst/examples/testsHelp.R | |
2052 #' @export | |
2053 testResiduals <- function(simulationOutput, plot = T){ | |
2054 | |
2055 opar = par(mfrow = c(1,3)) | |
2056 on.exit(par(opar)) | |
2057 out = list() | |
2058 out$uniformity = testUniformity(simulationOutput, plot = plot) | |
2059 out$dispersion = testDispersion(simulationOutput, plot = plot) | |
2060 out$outliers = testOutliers(simulationOutput, plot = plot) | |
2061 | |
2062 print(out) | |
2063 return(out) | |
2064 } | |
2065 | |
2066 #' Residual tests | |
2067 #' | |
2068 #' @details Deprecated, switch your code to using the \code{\link{testResiduals}} function | |
2069 #' | |
2070 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2071 #' @author Florian Hartig | |
2072 #' @export | |
2073 testSimulatedResiduals <- function(simulationOutput){ | |
2074 message("testSimulatedResiduals is deprecated, switch your code to using the testResiduals function") | |
2075 testResiduals(simulationOutput) | |
2076 } | |
2077 | |
2078 | |
2079 #' Test for overall uniformity | |
2080 #' | |
2081 #' This function tests the overall uniformity of the simulated residuals in a DHARMa object | |
2082 #' | |
2083 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2084 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis. See \code{\link[stats]{ks.test}} for details | |
2085 #' @param plot if T, plots calls \code{\link{plotQQunif}} as well | |
2086 #' @details The function applies a \code{\link[stats]{ks.test}} for uniformity on the simulated residuals. | |
2087 #' @author Florian Hartig | |
2088 #' @seealso \code{\link{testResiduals}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2089 #' @example inst/examples/testsHelp.R | |
2090 #' @export | |
2091 testUniformity<- function(simulationOutput, alternative = c("two.sided", "less", "greater"), plot = T){ | |
2092 | |
2093 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
2094 | |
2095 out <- suppressWarnings(ks.test(simulationOutput$scaledResiduals, 'punif', alternative = alternative)) | |
2096 if(plot == T) plotQQunif(simulationOutput = simulationOutput) | |
2097 return(out) | |
2098 } | |
2099 | |
2100 | |
2101 # Experimental | |
2102 testBivariateUniformity<- function(simulationOutput, alternative = c("two.sided", "less", "greater"), plot = T){ | |
2103 | |
2104 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
2105 | |
2106 #out <- suppressWarnings(ks.test(simulationOutput$scaledResiduals, 'punif', alternative = alternative)) | |
2107 #if(plot == T) plotQQunif(simulationOutput = simulationOutput) | |
2108 out = NULL | |
2109 return(out) | |
2110 } | |
2111 | |
2112 | |
2113 | |
2114 #' Test for quantiles | |
2115 #' | |
2116 #' This function tests | |
2117 #' | |
2118 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2119 #' @param predictor an optional predictor variable to be used, instead of the predicted response (default) | |
2120 #' @param quantiles the quantiles to be tested | |
2121 #' @param plot if T, the function will create an additional plot | |
2122 #' @details The function fits quantile regressions (via package qgam) on the residuals, and compares their location to the expected location (because of the uniform distributionm, the expected location is 0.5 for the 0.5 quantile). | |
2123 #' | |
2124 #' A significant p-value for the splines means the fitted spline deviates from a flat line at the expected location (p-values of intercept and spline are combined via Benjamini & Hochberg adjustment to control the FDR) | |
2125 #' | |
2126 #' The p-values of the splines are combined into a total p-value via Benjamini & Hochberg adjustment to control the FDR. | |
2127 #' | |
2128 #' @author Florian Hartig | |
2129 #' @example inst/examples/testQuantilesHelp.R | |
2130 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testOutliers}} | |
2131 #' @export | |
2132 testQuantiles <- function(simulationOutput, predictor = NULL, quantiles = c(0.25,0.5,0.75), plot = T){ | |
2133 | |
2134 if(plot == F){ | |
2135 | |
2136 out = list() | |
2137 out$data.name = deparse(substitute(simulationOutput)) | |
2138 | |
2139 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
2140 res = simulationOutput$scaledResiduals | |
2141 pred = ensurePredictor(simulationOutput, predictor) | |
2142 | |
2143 dat=data.frame(res = simulationOutput$scaledResiduals , pred = pred) | |
2144 | |
2145 quantileFits <- list() | |
2146 pval = rep(NA, length(quantiles)) | |
2147 predictions = data.frame(pred = sort(dat$pred)) | |
2148 predictions = cbind(predictions, matrix(ncol = 2 * length(quantiles), nrow = nrow(dat))) | |
2149 for(i in 1:length(quantiles)){ | |
2150 datTemp = dat | |
2151 datTemp$res = datTemp$res - quantiles[i] | |
2152 | |
2153 # settings for k = the dimension of the basis used to represent the smooth term. | |
2154 # see https://github.com/mfasiolo/qgam/issues/37 | |
2155 dimSmooth = min(length(unique(datTemp$pred)), 10) | |
2156 quantResult = try(capture.output(quantileFits[[i]] <- qgam::qgam(res ~ s(pred, k = dimSmooth) , data =datTemp, qu = quantiles[i])), silent = T) | |
2157 if(inherits(quantResult, "try-error")){ | |
2158 message("Unable to calculate quantile regression for quantile ", quantiles[i], ". Possibly to few (unique) data points / predictions. Will be ommited in plots and significance calculations.") | |
2159 } else { | |
2160 x = summary(quantileFits[[i]]) | |
2161 pval[i] = min(p.adjust(c(x$p.table[1,4], x$s.table[1,4]), method = "BH")) # correction for test on slope and intercept | |
2162 quantPre = predict(quantileFits[[i]], newdata = predictions, se = T) | |
2163 predictions[, 2*i] = quantPre$fit + quantiles[i] | |
2164 predictions[, 2*i + 1] = quantPre$se.fit | |
2165 } | |
2166 } | |
2167 | |
2168 out$method = "Test for location of quantiles via qgam" | |
2169 out$alternative = "both" | |
2170 out$pvals = pval | |
2171 out$p.value = min(p.adjust(pval, method = "BH")) # correction for multiple quantile tests | |
2172 out$predictions = predictions | |
2173 out$qgamFits = quantileFits | |
2174 | |
2175 class(out) = "htest" | |
2176 | |
2177 } else if(plot == T) { | |
2178 out <- plotResiduals(simulationOutput = simulationOutput, predictor = predictor, quantiles = quantiles) | |
2179 } | |
2180 return(out) | |
2181 } | |
2182 | |
2183 | |
2184 #unif.2017YMi(X, type = c("Q1", "Q2", "Q3"), lower = rep(0, ncol(X)),upper = rep(1, ncol(X))) | |
2185 | |
2186 #' Test for outliers | |
2187 #' | |
2188 #' This function tests if the number of observations outside the simulatio envelope are larger or smaller than expected | |
2189 #' | |
2190 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2191 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" (default) compared to the simulated null hypothesis | |
2192 #' @param margin whether to test for outliers only at the lower, only at the upper, or both sides (default) of the simulated data distribution | |
2193 #' @param plot if T, the function will create an additional plot | |
2194 #' @details DHARMa residuals are created by simulating from the fitted model, and comparing the simulated values to the observed data. It can occur that all simulated values are higher or smaller than the observed data, in which case they get the residual value of 0 and 1, respectively. I refer to these values as simulation outliers, or simply outliers. | |
2195 #' | |
2196 #' Because no data was simulated in the range of the observed value, we don't know "how strongly" these values deviate from the model expectation, so the term "outlier" should be used with a grain of salt - it's not a judgment about the magnitude of a deviation from an expectation, but simply that we are outside the simulated range, and thus cannot say anything more about the location of the residual. | |
2197 #' | |
2198 #' Note also that the number of outliers will decrease as we increase the number of simulations. Under the null hypothesis that the model is correct, we expect nData /(nSim +1) outliers at each margin of the distribution. For a reason, consider that if the data and the model distribution are identical, the probability that a given observation is higher than all simulations is 1/(nSim +1). | |
2199 #' | |
2200 #' Based on this null expectation, we can test for an excess or lack of outliers. Per default, testOutliers() looks for both, so if you get a significant p-value, you have to check if you have to many or too few outliers. An excess of outliers is to be interpreted as too many values outside the simulation envelope. This could be caused by overdispersion, or by what we classically call outliers. A lack of outliers would be caused, for example, by underdispersion. | |
2201 #' | |
2202 #' | |
2203 #' @author Florian Hartig | |
2204 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2205 #' @example inst/examples/testOutliersHelp.R | |
2206 #' @export | |
2207 testOutliers <- function(simulationOutput, alternative = c("two.sided", "greater", "less"), margin = c("both", "upper", "lower"), plot = T){ | |
2208 | |
2209 # check inputs | |
2210 alternative = match.arg(alternative) | |
2211 margin = match.arg(margin) | |
2212 data.name = deparse(substitute(simulationOutput)) # remember: needs to be called before ensureDHARMa | |
2213 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model") | |
2214 | |
2215 # calculation of outliers | |
2216 if(margin == "both") outliers = sum(simulationOutput$scaledResiduals %in% c(0, 1)) | |
2217 if(margin == "upper") outliers = sum(simulationOutput$scaledResiduals == 1) | |
2218 if(margin == "lower") outliers = sum(simulationOutput$scaledResiduals == 0) | |
2219 | |
2220 # calculations of trials and H0 | |
2221 outFreqH0 = 1/(simulationOutput$nSim +1) * ifelse(margin == "both", 2, 1) | |
2222 trials = simulationOutput$nObs | |
2223 | |
2224 out = binom.test(outliers, trials, p = outFreqH0, alternative = alternative) | |
2225 | |
2226 # overwrite information in binom.test | |
2227 | |
2228 out$data.name = data.name | |
2229 out$margin = margin | |
2230 out$method = "DHARMa outlier test based on exact binomial test" | |
2231 | |
2232 names(out$statistic) = paste("outliers at", margin, "margin(s)") | |
2233 names(out$parameter) = "simulations" | |
2234 names(out$estimate) = paste("frequency of outliers (expected:", out$null.value,")") | |
2235 | |
2236 out$interval = "tst" | |
2237 | |
2238 out$interval = | |
2239 | |
2240 if(plot == T) { | |
2241 | |
2242 hist(simulationOutput, main = "") | |
2243 | |
2244 main = ifelse(out$p.value <= 0.05, | |
2245 "Outlier test significant", | |
2246 "Outlier test n.s.") | |
2247 | |
2248 title(main = main, cex.main = 1, | |
2249 col.main = ifelse(out$p.value <= 0.05, "red", "black")) | |
2250 | |
2251 # legend("center", c(paste("p=", round(out$p.value, digits = 5)), paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(out$p.value < 0.05, "red", "black" )) | |
2252 | |
2253 } | |
2254 return(out) | |
2255 } | |
2256 | |
2257 | |
2258 #' DHARMa dispersion tests | |
2259 #' | |
2260 #' This function performs a simulation-based test for over/underdispersion | |
2261 #' | |
2262 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2263 #' @param plot whether to plot output | |
2264 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis. Greater corresponds to overdispersion. | |
2265 #' @param ... arguments to pass on to \code{\link{testGeneric}} | |
2266 #' @details The function implements two tests, depending on whether it is applied on a simulation with refit = F, or refit = T. | |
2267 #' | |
2268 #' If refit = F, the function tests the sd of the data against the sd of the simulated data. | |
2269 #' | |
2270 #' If refit = T, the function compares the approximate deviance (via squared pearson residuals) with the same quantity from the models refitted with simulated data. Applying this is much slower than the previous alternative. Given the computational cost, I would suggest that most users will be satisfied with the standard dispersion test. | |
2271 #' | |
2272 #' @note The results of the dispersion test can can differ depending on whether it is evaluated on conditional (= conditional on fitted random effects) or unconditional (= REs are re-simulated) simulations. You can change between conditional or unconditional simulations in \code{\link{simulateResiduals}} if this is supported by the regression package that you use. The default in DHARMa is to use unconditional simulations, but I have often found that conditional simulations are more sensitive to dispersion problems. I recommend trying both, as neither test should be positive if the dispersion is correct. | |
2273 #' | |
2274 #' @author Florian Hartig | |
2275 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2276 #' @example inst/examples/testsHelp.R | |
2277 #' @export | |
2278 testDispersion <- function(simulationOutput, alternative = c("two.sided", "greater", "less"), plot = T, ...){ | |
2279 | |
2280 out = list() | |
2281 out$data.name = deparse(substitute(simulationOutput)) | |
2282 | |
2283 alternative <- match.arg(alternative) | |
2284 | |
2285 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model") | |
2286 | |
2287 if(simulationOutput$refit == F){ | |
2288 | |
2289 spread <- function(x) sd(x - simulationOutput$fittedPredictedResponse) | |
2290 out = testGeneric(simulationOutput, summary = spread, alternative = alternative, methodName = "DHARMa nonparametric dispersion test via sd of residuals fitted vs. simulated", plot = plot, ...) | |
2291 } else { | |
2292 | |
2293 observed = tryCatch(sum(residuals(simulationOutput$fittedModel, type = "pearson")^2), error = function(e) { | |
2294 message(paste("DHARMa: the requested tests requires pearson residuals, but your model does not implement these calculations. Test will return NA. Error message:", e)) | |
2295 return(NA) | |
2296 }) | |
2297 if(is.na(observed)) return(NA) | |
2298 expected = apply(simulationOutput$refittedPearsonResiduals^2 , 2, sum) | |
2299 out$statistic = c(dispersion = observed / mean(expected)) | |
2300 out$method = "DHARMa nonparametric dispersion test via mean deviance residual fitted vs. simulated-refitted" | |
2301 | |
2302 p = getP(simulated = expected, observed = observed, alternative = alternative) | |
2303 | |
2304 out$alternative = alternative | |
2305 out$p.value = p | |
2306 class(out) = "htest" | |
2307 | |
2308 if(plot == T) { | |
2309 #plotTitle = gsub('(.{1,50})(\\s|$)', '\\1\n', out$method) | |
2310 xLabel = paste("Simulated values, red line = fitted model. p-value (",out$alternative, ") = ", out$p.value, sep ="") | |
2311 | |
2312 hist(expected, xlim = range(expected, observed, na.rm=T ), col = "lightgrey", main = "", xlab = xLabel, breaks = 20, cex.main = 1) | |
2313 abline(v = observed, lwd= 2, col = "red") | |
2314 | |
2315 main = ifelse(out$p.value <= 0.05, | |
2316 "Dispersion test significant", | |
2317 "Dispersion test n.s.") | |
2318 | |
2319 title(main = main, cex.main = 1, | |
2320 col.main = ifelse(out$p.value <= 0.05, "red", "black")) | |
2321 } | |
2322 } | |
2323 | |
2324 return(out) | |
2325 } | |
2326 | |
2327 #' Simulated overdisperstion tests | |
2328 #' | |
2329 #' @details Deprecated, switch your code to using the \code{\link{testDispersion}} function | |
2330 #' | |
2331 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2332 #' @param ... additional arguments to \code{\link{testDispersion}} | |
2333 #' @export | |
2334 testOverdispersion <- function(simulationOutput, ...){ | |
2335 message("testOverdispersion is deprecated, switch your code to using the testDispersion function") | |
2336 testDispersion(simulationOutput, ...) | |
2337 } | |
2338 | |
2339 #' Parametric overdisperstion tests | |
2340 #' | |
2341 #' @details Deprecated, switch your code to using the \code{\link{testDispersion}} function. The function will do nothing, arguments will be ignored, the parametric tests is no longer recommend | |
2342 #' | |
2343 #' @param ... arguments will be ignored, the parametric tests is no longer recommend | |
2344 #' @export | |
2345 testOverdispersionParametric <- function(...){ | |
2346 message("testOverdispersionParametric is deprecated and no longer recommended, see release notes in DHARMA 0.2.0 - switch your code to using the testDispersion function") | |
2347 return(0) | |
2348 } | |
2349 | |
2350 | |
2351 #' Tests for zero-inflation | |
2352 #' | |
2353 #' This function compares the observed number of zeros with the zeros expected from simulations. | |
2354 #' | |
2355 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2356 #' @param ... further arguments to \code{\link{testGeneric}} | |
2357 #' @details The plot shows the expected distribution of zeros against the observed values, the ratioObsSim shows observed vs. simulated zeros. A value < 1 means that the observed data has less zeros than expected, a value > 1 means that it has more zeros than expected (aka zero-inflation). Per default, the function tests both sides. | |
2358 #' | |
2359 #' Some notes about common problems / questions: | |
2360 #' | |
2361 #' * Zero-inflation tests after fitting the model are crucial to see if you have zero-inflation. Just because there are a lot of zeros doesn't mean you have zero-inflation, see Warton, D. I. (2005). Many zeros does not mean zero inflation: comparing the goodness-of-fit of parametric models to multivariate abundance data. Environmetrics 16(3), 275-289. | |
2362 #' | |
2363 #' * That being said, zero-inflation tests are often not a reliable guide to decide wheter to add a zi term or not. In general, model structures should be decided on ideally a priori, if that is not possible via model selection techniques (AIC, BIC, WAIC, Bayes Factor). A zero-inflation test should only be run after that decision, and to validate the decision that was taken. | |
2364 #' | |
2365 #' @note This function is a wrapper for \code{\link{testGeneric}}, where the summary argument is set to function(x) sum(x == 0) | |
2366 #' @author Florian Hartig | |
2367 #' @example inst/examples/testsHelp.R | |
2368 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2369 #' @export | |
2370 testZeroInflation <- function(simulationOutput, ...){ | |
2371 countZeros <- function(x) sum( x == 0) | |
2372 testGeneric(simulationOutput = simulationOutput, summary = countZeros, methodName = "DHARMa zero-inflation test via comparison to expected zeros with simulation under H0 = fitted model", ... ) | |
2373 } | |
2374 | |
2375 | |
2376 #' Generic simulation test of a summary statistic | |
2377 #' | |
2378 #' This function tests if a user-defined summary differs when applied to simulated / observed data. | |
2379 #' | |
2380 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2381 #' @param summary a function that can be applied to simulated / observed data. See examples below | |
2382 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis | |
2383 #' @param plot whether to plot the simulated summary | |
2384 #' @param methodName name of the test (will be used in plot) | |
2385 #' | |
2386 #' @details This function tests if a user-defined summary differs when applied to simulated / observed data. the function can easily be remodeled to apply summaries on the residuals, by simply defining f = function(x) summary (x - predictions), as done in \code{\link{testDispersion}} | |
2387 #' | |
2388 #' @note The function that you supply is applied on the data as it is represented in your fitted model, which may not always correspond to how you think. This is important in particular when you use k/n binomial data, and want to test for 1-inflation. As an example, if have k/20 observations, and you provide your data via cbind (y, y-20), you have to test for 20-inflation (because this is how the data is represented in the model). However, if you provide data via y/20, and weights = 20, you should test for 1-inflation. In doubt, check how the data is internally represented in model.frame(model), or via simulate(model) | |
2389 #' | |
2390 #' @export | |
2391 #' @author Florian Hartig | |
2392 #' @example inst/examples/testsHelp.R | |
2393 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2394 testGeneric <- function(simulationOutput, summary, alternative = c("two.sided", "greater", "less"), plot = T, methodName = "DHARMa generic simulation test"){ | |
2395 | |
2396 out = list() | |
2397 out$data.name = deparse(substitute(simulationOutput)) | |
2398 | |
2399 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model") | |
2400 | |
2401 alternative <- match.arg(alternative) | |
2402 | |
2403 observed = summary(simulationOutput$observedResponse) | |
2404 | |
2405 simulated = apply(simulationOutput$simulatedResponse, 2, summary) | |
2406 | |
2407 p = getP(simulated = simulated, observed = observed, alternative = alternative) | |
2408 | |
2409 out$statistic = c(ratioObsSim = observed / mean(simulated)) | |
2410 out$method = methodName | |
2411 out$alternative = alternative | |
2412 out$p.value = p | |
2413 | |
2414 | |
2415 class(out) = "htest" | |
2416 | |
2417 if(plot == T) { | |
2418 plotTitle = gsub('(.{1,50})(\\s|$)', '\\1\n', methodName) | |
2419 xLabel = paste("Simulated values, red line = fitted model. p-value (",out$alternative, ") = ", out$p.value, sep ="") | |
2420 hist(simulated, xlim = range(simulated, observed, na.rm=T ), col = "lightgrey", main = plotTitle, xlab = xLabel, breaks = max(round(simulationOutput$nSim / 5), 20), cex.main = 0.8) | |
2421 abline(v = observed, lwd= 2, col = "red") | |
2422 } | |
2423 return(out) | |
2424 } | |
2425 | |
2426 | |
2427 #' Test for temporal autocorrelation | |
2428 #' | |
2429 #' This function performs a standard test for temporal autocorrelation on the simulated residuals | |
2430 #' | |
2431 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}} | |
2432 #' @param time the time, in the same order as the data points. If not provided, random values will be created | |
2433 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis | |
2434 #' @param plot whether to plot output | |
2435 #' @details The function performs a Durbin-Watson test on the uniformly scaled residuals, and plots the residuals against time. The DB test was originally be designed for normal residuals. In simulations, I didn't see a problem with this setting though. The alternative is to transform the uniform residuals to normal residuals and perform the DB test on those. | |
2436 #' | |
2437 #' If no time values are provided, random values will be created. The sense of being able to run the test with time = NULL (random values) is to test the rate of false positives under the current residual structure (random time corresponds to H0: no spatial autocorrelation), e.g. to check if the test has noninal error rates for particular residual structures (note that Durbin-Watson originally assumes normal residuals, error rates seem correct for uniform residuals, but may not be correct if there are still other residual problems). | |
2438 #' | |
2439 #' Testing for temporal autocorrelation requires unique time values - if you have several observations per time value, either use the recalculateResiduals function to aggregate residuals per time step, or extract the residuals from the fitted object, and plot / test each of them independently for temporally repeated subgroups (typical choices would be location / subject etc.). Note that the latter must be done by hand, outside testSpatialAutocorrelation. | |
2440 #' | |
2441 #' @note Important to note for all autocorrelation tests (spatial / temporal): the autocorrelation tests are valid to check for residual autocorrelation in models that don't assume such a correlation (in this case, you can use conditional or unconditional simulations), or if there is remaining residual autocorrelation after accounting for it in a spatial/temporal model (in that case, you have to use conditional simulations), but if checking unconditional simulations from a model with an autocorrelation structure on data that corresponds to this model, they will be significant, even if the model fully accounts for this structure. | |
2442 #' | |
2443 #' This behavior is not really a bug, but rather originates from the definition of the quantile residuals: quantile residuals are calculated independently per data point, i.e. without consideratin of any correlation structure between data points that may exist in the simulations. As a result, the simulated distributions from a unconditional simulaton will typically not reflect the correlation structure that is present in each single simulation, and the same is true for the subsequently calculated quantile residuals. | |
2444 #' | |
2445 #' The bottomline here is that spatial / temporal / other autoregressive models should either be tested based on conditional simulations, or (ideally) custom tests should be used that are not based on quantile residuals, but rather compare the correlation structure in the simulated data with the correlation structure in the observed data. | |
2446 #' | |
2447 #' @author Florian Hartig | |
2448 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testSpatialAutocorrelation}}, \code{\link{testQuantiles}} | |
2449 #' @example inst/examples/testTemporalAutocorrelationHelp.R | |
2450 #' @export | |
2451 testTemporalAutocorrelation <- function(simulationOutput, time = NULL , alternative = c("two.sided", "greater", "less"), plot = T){ | |
2452 | |
2453 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
2454 | |
2455 # actually not sure if this is neccessary for dwtest, but seems better to aggregate | |
2456 if(any(duplicated(time))) stop("testing for temporal autocorrelation requires unique time values - if you have several observations per time value, either use the recalculateResiduals function to aggregate residuals per time step, or extract the residuals from the fitted object, and plot / test each of them independently for temporally repeated subgroups (typical choices would be location / subject etc.). Note that the latter must be done by hand, outside testSpatialAutocorrelation.") | |
2457 | |
2458 alternative <- match.arg(alternative) | |
2459 | |
2460 if(is.null(time)){ | |
2461 time = sample.int(simulationOutput$nObs, simulationOutput$nObs) | |
2462 message("DHARMa::testTemporalAutocorrelation - no time argument provided, using random times for each data point") | |
2463 } | |
2464 | |
2465 out = lmtest::dwtest(simulationOutput$scaledResiduals ~ 1, order.by = time, alternative = alternative) | |
2466 | |
2467 if(plot == T) { | |
2468 oldpar <- par(mfrow = c(1,2)) | |
2469 on.exit(par(oldpar)) | |
2470 | |
2471 plot(simulationOutput$scaledResiduals[order(time)] ~ time[order(time)], | |
2472 type = "l", ylab = "Scaled residuals", xlab = "Time", main = "Residuals vs. time") | |
2473 acf(simulationOutput$scaledResiduals[order(time)], main = "Autocorrelation") | |
2474 legend("topright", | |
2475 c(paste(out$method, " p=", round(out$p.value, digits = 5)), | |
2476 paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), | |
2477 text.col = ifelse(out$p.value < 0.05, "red", "black" ), bty="n") | |
2478 | |
2479 } | |
2480 | |
2481 return(out) | |
2482 } | |
2483 | |
2484 | |
2485 #' Test for spatial autocorrelation | |
2486 #' | |
2487 #' This function performs a standard test for spatial autocorrelation on the simulated residuals | |
2488 #' | |
2489 #' @param simulationOutput an object of class DHARMa with simulated quantile residuals, either created via \code{\link{simulateResiduals}} or by \code{\link{createDHARMa}} for simulations created outside DHARMa | |
2490 #' @param x the x coordinate, in the same order as the data points. If not provided, random values will be created | |
2491 #' @param y the y coordinate, in the same order as the data points. If not provided, random values will be created | |
2492 #' @param distMat optional distance matrix. If not provided, a distance matrix will be calculated based on x and y. See details for explanation | |
2493 #' @param alternative a character string specifying whether the test should test if observations are "greater", "less" or "two.sided" compared to the simulated null hypothesis | |
2494 #' @param plot whether to plot output | |
2495 #' @details The function performs Moran.I test from the package ape, based on the provided distance matrix of the data points. | |
2496 #' | |
2497 #' There are several ways to specify this distance. If a distance matrix (distMat) is provided, calculations will be based on this distance matrix, and x,y coordinates will only used for the plotting (if provided) | |
2498 #' If distMat is not provided, the function will calculate the euclidian distances between x,y coordinates, and test Moran.I based on these distances. | |
2499 #' | |
2500 #' If no x/y values are provided, random values will be created. The sense of being able to run the test with x/y = NULL (random values) is to test the rate of false positives under the current residual structure (random x/y corresponds to H0: no spatial autocorrelation), e.g. to check if the test has nominal error rates for particular residual structures. | |
2501 #' | |
2502 #' Testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, either use the recalculateResiduals function to aggregate residuals per location, or extract the residuals from the fitted object, and plot / test each of them independently for spatially repeated subgroups (a typical scenario would repeated spatial observation, in which case one could plot / test each time step separately for temporal autocorrelation). Note that the latter must be done by hand, outside testSpatialAutocorrelation. | |
2503 #' | |
2504 #' @note Important to note for all autocorrelation tests (spatial / temporal): the autocorrelation tests are valid to check for residual autocorrelation in models that don't assume such a correlation (in this case, you can use conditional or unconditional simulations), or if there is remaining residual autocorrelation after accounting for it in a spatial/temporal model (in that case, you have to use conditional simulations), but if checking unconditional simulations from a model with an autocorrelation structure on data that corresponds to this model, they will be significant, even if the model fully accounts for this structure. | |
2505 #' | |
2506 #' This behavior is not really a bug, but rather originates from the definition of the quantile residuals: quantile residuals are calculated independently per data point, i.e. without consideratin of any correlation structure between data points that may exist in the simulations. As a result, the simulated distributions from a unconditional simulaton will typically not reflect the correlation structure that is present in each single simulation, and the same is true for the subsequently calculated quantile residuals. | |
2507 #' | |
2508 #' The bottomline here is that spatial / temporal / other autoregressive models should either be tested based on conditional simulations, or (ideally) custom tests should be used that are not based on quantile residuals, but rather compare the correlation structure in the simulated data with the correlation structure in the observed data. | |
2509 #' | |
2510 #' @author Florian Hartig | |
2511 #' @seealso \code{\link{testResiduals}}, \code{\link{testUniformity}}, \code{\link{testOutliers}}, \code{\link{testDispersion}}, \code{\link{testZeroInflation}}, \code{\link{testGeneric}}, \code{\link{testTemporalAutocorrelation}}, \code{\link{testQuantiles}} | |
2512 #' @import grDevices | |
2513 #' @example inst/examples/testSpatialAutocorrelationHelp.R | |
2514 #' @export | |
2515 testSpatialAutocorrelation <- function(simulationOutput, x = NULL, y = NULL, distMat = NULL, alternative = c("two.sided", "greater", "less"), plot = T){ | |
2516 | |
2517 alternative <- match.arg(alternative) | |
2518 data.name = deparse(substitute(simulationOutput)) # needs to be before ensureDHARMa | |
2519 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
2520 | |
2521 if(any(duplicated(cbind(x,y)))) stop("testing for spatial autocorrelation requires unique x,y values - if you have several observations per location, either use the recalculateResiduals function to aggregate residuals per location, or extract the residuals from the fitted object, and plot / test each of them independently for spatially repeated subgroups (a typical scenario would repeated spatial observation, in which case one could plot / test each time step separately for temporal autocorrelation). Note that the latter must be done by hand, outside testSpatialAutocorrelation.") | |
2522 | |
2523 if( (!is.null(x) | !is.null(y)) & !is.null(distMat) ) message("both coordinates and distMat provided, calculations will be done based on the distance matrix, coordinates will only be used for plotting") | |
2524 # if not provided, fill x and y with random numbers (Null model) | |
2525 if(is.null(x)){ | |
2526 x = runif(simulationOutput$nObs, -1,1) | |
2527 message("DHARMa::testSpatialAutocorrelation - no x coordinates provided, using random values for each data point") | |
2528 } | |
2529 | |
2530 if(is.null(y)){ | |
2531 y = runif(simulationOutput$nObs, -1,1) | |
2532 message("DHARMa::testSpatialAutocorrelation - no x coordinates provided, using random values for each data point") | |
2533 } | |
2534 | |
2535 # if not provided, create distance matrix based on x and y | |
2536 if(is.null(distMat)) distMat <- as.matrix(dist(cbind(x, y))) | |
2537 | |
2538 invDistMat <- 1/distMat | |
2539 diag(invDistMat) <- 0 | |
2540 | |
2541 MI = ape::Moran.I(simulationOutput$scaledResiduals, weight = invDistMat, alternative = alternative) | |
2542 | |
2543 out = list() | |
2544 out$statistic = c(observed = MI$observed, expected = MI$expected, sd = MI$sd) | |
2545 out$method = "DHARMa Moran's I test for spatial autocorrelation" | |
2546 out$alternative = "Spatial autocorrelation" | |
2547 out$p.value = MI$p.value | |
2548 out$data.name = data.name | |
2549 | |
2550 class(out) = "htest" | |
2551 | |
2552 | |
2553 | |
2554 if(plot == T) { | |
2555 opar <- par(mfrow = c(1,1)) | |
2556 on.exit(par(opar)) | |
2557 | |
2558 col = colorRamp(c("red", "white", "blue"))(simulationOutput$scaledResiduals) | |
2559 plot(x,y, col = rgb(col, maxColorValue = 255), main = out$method, cex.main = 0.8 ) | |
2560 | |
2561 # TODO implement correlogram | |
2562 } | |
2563 | |
2564 if(plot == T) { | |
2565 | |
2566 | |
2567 } | |
2568 return(out) | |
2569 } | |
2570 | |
2571 | |
2572 getP <- function(simulated, observed, alternative){ | |
2573 | |
2574 if(alternative == "greater") p = mean(simulated >= observed) | |
2575 if(alternative == "less") p = mean(simulated <= observed) | |
2576 if(alternative == "two.sided") p = min(min(mean(simulated <= observed), mean(simulated >= observed) ) * 2,1) | |
2577 | |
2578 return(p) | |
2579 } | |
2580 | |
2581 | |
2582 | |
2583 ####################### tests.R | |
2584 | |
2585 ####################### compatibility.R | |
2586 | |
2587 | |
2588 # New S3 methods | |
2589 | |
2590 #' Get model response | |
2591 #' | |
2592 #' Extract the response of a fitted model | |
2593 #' | |
2594 #' The purpose of this function is to savely extract the response (dependent variable) of the fitted model classes | |
2595 #' | |
2596 #' @param object a fitted model | |
2597 #' @param ... additional parameters | |
2598 #' | |
2599 #' @example inst/examples/wrappersHelp.R | |
2600 #' | |
2601 #' @seealso \code{\link{getRefit}}, \code{\link{getSimulations}}, \code{\link{getFixedEffects}}, \code{\link{getFitted}} | |
2602 #' @author Florian Hartig | |
2603 #' @export | |
2604 getObservedResponse <- function (object, ...) { | |
2605 UseMethod("getObservedResponse", object) | |
2606 } | |
2607 | |
2608 #' @rdname getObservedResponse | |
2609 #' @export | |
2610 getObservedResponse.default <- function (object, ...){ | |
2611 out = model.frame(object)[,1] | |
2612 | |
2613 # check for weights in k/n case | |
2614 if(family(object)$family %in% c("binomial", "betabinomial") & "(weights)" %in% colnames(model.frame(object))){ | |
2615 x = model.frame(object) | |
2616 out = out * x$`(weights)` | |
2617 } | |
2618 | |
2619 # check for k/n binomial | |
2620 if(is.matrix(out)){ | |
2621 if(!(ncol(out) == 2)) securityAssertion("nKcase - wrong dimensions of response") | |
2622 if(!(family(object)$family %in% c("binomial", "betabinomial"))) securityAssertion("nKcase - wrong family") | |
2623 | |
2624 out = out[,1] | |
2625 } | |
2626 | |
2627 # observation is factor - unlike lme4 and older, glmmTMB simulates nevertheless as numeric | |
2628 if(is.factor(out)) out = as.numeric(out) - 1 | |
2629 | |
2630 return(out) | |
2631 } | |
2632 | |
2633 weightsWarning = "Model was fit with prior weights. These will be ignored in the simulation. See ?getSimulations for details" | |
2634 | |
2635 #' Get model simulations | |
2636 #' | |
2637 #' Wrapper to simulate from a fitted model | |
2638 #' | |
2639 #' The purpose of this wrapper for for the simulate function is to return the simulations from a model in a standardized way | |
2640 #' | |
2641 #' @param object a fitted model | |
2642 #' @param nsim number of simulations | |
2643 #' @param type if simulations should be prepared for getQuantile or for refit | |
2644 #' @param ... additional parameters to be passed on, usually to the simulate function of the respective model class | |
2645 #' | |
2646 #' @return a matrix with simulations | |
2647 #' @example inst/examples/wrappersHelp.R | |
2648 #' | |
2649 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getRefit}}, \code{\link{getFixedEffects}}, \code{\link{getFitted}} | |
2650 #' | |
2651 #' @details The function is a wrapper for for the simulate function is to return the simulations from a model in a standardized way. | |
2652 #' | |
2653 #' Note: if the model was fit with weights, the function will throw a warning if used with a model class whose simulate function does not include the weightsi in the simulations. Note that the results may or may not be appropriate in this case, depending on how you use the weights. | |
2654 #' | |
2655 #' | |
2656 #' @author Florian Hartig | |
2657 #' @export | |
2658 getSimulations <- function (object, nsim = 1 , type = c("normal", "refit"), ...) { | |
2659 UseMethod("getSimulations", object) | |
2660 } | |
2661 | |
2662 #' @rdname getSimulations | |
2663 #' @export | |
2664 getSimulations.default <- function (object, nsim = 1, type = c("normal", "refit"), ...){ | |
2665 | |
2666 type <- match.arg(type) | |
2667 | |
2668 out = simulate(object, nsim = nsim , ...) | |
2669 | |
2670 if (type == "normal"){ | |
2671 if(family(object)$family %in% c("binomial", "betabinomial")){ | |
2672 if("(weights)" %in% colnames(model.frame(object))){ | |
2673 x = model.frame(object) | |
2674 out = out * x$`(weights)` | |
2675 } else if (is.matrix(out[[1]])){ | |
2676 # this is for the k/n binomial case | |
2677 out = as.matrix(out)[,seq(1, (2*nsim), by = 2)] | |
2678 } else if(is.factor(out[[1]])){ | |
2679 if(nlevels(out[[1]]) != 2){ | |
2680 warning("The fitted model has a factorial response with number of levels not equal to 2 - there is currently no sensible application in DHARMa that would lead to this situation. Likely, you are trying something that doesn't work.") | |
2681 } | |
2682 else{ | |
2683 out = data.matrix(out) - 1 | |
2684 } | |
2685 } | |
2686 } | |
2687 | |
2688 if(!is.matrix(out)) out = data.matrix(out) | |
2689 } else{ | |
2690 if(family(object)$family %in% c("binomial", "betabinomial")){ | |
2691 if (!is.matrix(out[[1]]) & !is.numeric(out)) data.frame(data.matrix(out)-1) | |
2692 } | |
2693 } | |
2694 | |
2695 return(out) | |
2696 } | |
2697 | |
2698 | |
2699 #' Extract fixed effects of a supported model | |
2700 #' | |
2701 #' A wrapper to extract fixed effects of a supported model | |
2702 #' | |
2703 #' @param fittedModel a fitted model | |
2704 #' | |
2705 #' @example inst/examples/wrappersHelp.R | |
2706 #' | |
2707 #' @importFrom lme4 fixef | |
2708 #' | |
2709 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getRefit}}, \code{\link{getFitted}} | |
2710 #' @export | |
2711 getFixedEffects <- function(fittedModel){ | |
2712 | |
2713 if(class(fittedModel)[1] %in% c("glm", "lm", "gam", "bam", "negbin") ){ | |
2714 out = coef(fittedModel) | |
2715 } else if(class(fittedModel)[1] %in% c("glmerMod", "lmerMod", "HLfit")){ | |
2716 out = fixef(fittedModel) | |
2717 } else if(class(fittedModel)[1] %in% c("glmmTMB")){ | |
2718 out = glmmTMB::fixef(fittedModel) | |
2719 out = out$cond | |
2720 } else { | |
2721 out = coef(fittedModel) | |
2722 if(is.null(out)) out = fixef(fittedModel) | |
2723 } | |
2724 return(out) | |
2725 } | |
2726 | |
2727 #' Get model refit | |
2728 #' | |
2729 #' Wrapper to refit a fitted model | |
2730 #' | |
2731 #' @param object a fitted model | |
2732 #' @param newresp the new response that should be used to refit the model | |
2733 #' @param ... additional parameters to be passed on to the refit or update class that is used to refit the model | |
2734 #' | |
2735 #' @details The purpose of this wrapper is to standardize the refit of a model. The behavior of this function depends on the supplied model. When available, it uses the refit method, otherwise it will use update. For glmmTMB: since version 1.0, glmmTMB has a refit function, but this didn't work, so I switched back to this implementation, which is a hack based on the update function. | |
2736 #' | |
2737 #' @example inst/examples/wrappersHelp.R | |
2738 #' | |
2739 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getFixedEffects}} | |
2740 #' @author Florian Hartig | |
2741 #' @export | |
2742 getRefit <- function (object, newresp, ...) { | |
2743 UseMethod("getRefit", object) | |
2744 } | |
2745 | |
2746 #' @rdname getRefit | |
2747 #' | |
2748 #' @importFrom lme4 refit | |
2749 #' | |
2750 #' @export | |
2751 getRefit.default <- function (object, newresp, ...){ | |
2752 refit(object, newresp, ...) | |
2753 } | |
2754 | |
2755 #' Get model fitted | |
2756 #' | |
2757 #' Wrapper to get the fitted value a fitted model | |
2758 #' | |
2759 #' The purpose of this wrapper is to standardize extract the fitted values | |
2760 #' | |
2761 #' @param object a fitted model | |
2762 #' @param ... additional parameters to be passed on, usually to the simulate function of the respective model class | |
2763 #' | |
2764 #' @example inst/examples/wrappersHelp.R | |
2765 #' | |
2766 #' @seealso \code{\link{getObservedResponse}}, \code{\link{getSimulations}}, \code{\link{getRefit}}, \code{\link{getFixedEffects}} | |
2767 #' | |
2768 #' @author Florian Hartig | |
2769 #' @export | |
2770 getFitted <- function (object, ...) { | |
2771 UseMethod("getFitted", object) | |
2772 } | |
2773 | |
2774 #' @rdname getFitted | |
2775 #' @export | |
2776 getFitted.default <- function (object,...){ | |
2777 fitted(object, ...) | |
2778 } | |
2779 | |
2780 #' has NA | |
2781 #' | |
2782 #' checks if the fitted model excluded NA values | |
2783 #' | |
2784 #' @param object a fitted model | |
2785 #' | |
2786 #' @details Checks if the fitted model excluded NA values | |
2787 #' | |
2788 #' @export | |
2789 | |
2790 | |
2791 # hasNA <- function(object){ | |
2792 # x = rownames(model.frame(object)) | |
2793 # if(length(x) < as.numeric(x[length(x) ])) return(TRUE) | |
2794 # else return(FALSE) | |
2795 # } | |
2796 | |
2797 ######### LM ############# | |
2798 | |
2799 #' @rdname getRefit | |
2800 #' @export | |
2801 getRefit.lm <- function(object, newresp, ...){ | |
2802 | |
2803 newData <-model.frame(object) | |
2804 | |
2805 if(is.vector(newresp)){ | |
2806 newData[,1] = newresp | |
2807 } else if (is.factor(newresp)){ | |
2808 # Hack to make the factor binomial case work | |
2809 newData[,1] = as.numeric(newresp) - 1 | |
2810 } else { | |
2811 # Hack to make the binomial n/k case work | |
2812 newData[[1]] = NULL | |
2813 newData = cbind(newresp, newData) | |
2814 } | |
2815 | |
2816 refittedModel = update(object, data = newData) | |
2817 return(refittedModel) | |
2818 } | |
2819 | |
2820 | |
2821 hasWeigths.lm <- function(object, ...){ | |
2822 if(length(unique(object$prior.weights)) != 1) return(TRUE) | |
2823 else return(FALSE) | |
2824 } | |
2825 | |
2826 | |
2827 ######### GLM ############# | |
2828 | |
2829 #' @rdname getSimulations | |
2830 #' @export | |
2831 getSimulations.negbin<- function (object, nsim = 1, type = c("normal", "refit"), ...){ | |
2832 if("(weights)" %in% colnames(model.frame(object))) warning(weightsWarning) | |
2833 getSimulations.default(object = object, nsim = nsim, type = type, ...) | |
2834 } | |
2835 | |
2836 | |
2837 ######## MGCV ############ | |
2838 | |
2839 # This function overwrites the standard fitted function for GAM | |
2840 | |
2841 #' @rdname getFitted | |
2842 #' @export | |
2843 getFitted.gam <- function(object, ...){ | |
2844 class(object) = "glm" | |
2845 out = stats::fitted(object, ...) | |
2846 names(out) = as.character(1:length(out)) | |
2847 out | |
2848 } | |
2849 | |
2850 # Check that this works | |
2851 # plot(fitted(fittedModelGAM), predict(fittedModelGAM, type = "response")) | |
2852 | |
2853 | |
2854 ######## lme4 ############ | |
2855 | |
2856 | |
2857 #' @rdname getSimulations | |
2858 #' @export | |
2859 getSimulations.lmerMod <- function (object, nsim = 1, type = c("normal", "refit"), ...){ | |
2860 | |
2861 if("(weights)" %in% colnames(model.frame(object))) warning(weightsWarning) | |
2862 getSimulations.default(object = object, nsim = nsim, type = type, ...) | |
2863 } | |
2864 | |
2865 | |
2866 ######## glmmTMB ###### | |
2867 | |
2868 #' @rdname getRefit | |
2869 #' @export | |
2870 getRefit.glmmTMB <- function(object, newresp, ...){ | |
2871 newData <-model.frame(object) | |
2872 | |
2873 # hack to make update work - for some reason, glmmTMB wants the matrix embedded in the df for update to work ... should be solved ideally, see https://github.com/glmmTMB/glmmTMB/issues/549 | |
2874 if(is.matrix(newresp)){ | |
2875 tmp = colnames(newData[[1]]) | |
2876 newData[[1]] = NULL | |
2877 newData = cbind(newresp, newData) | |
2878 colnames(newData)[1:2] = tmp | |
2879 } else { | |
2880 newData[[1]] = newresp | |
2881 } | |
2882 refittedModel = update(object, data = newData) | |
2883 return(refittedModel) | |
2884 } | |
2885 | |
2886 | |
2887 # glmmTMB simulates normal counts (and not proportions in any case, so the check for the other models is not needed), see #158 | |
2888 # note that if observation is factor - unlike lme4 and older, glmmTMB simulates nevertheless as numeric | |
2889 | |
2890 #' @rdname getSimulations | |
2891 #' @export | |
2892 getSimulations.glmmTMB <- function (object, nsim = 1, type = c("normal", "refit"), ...){ | |
2893 | |
2894 if("(weights)" %in% colnames(model.frame(object)) & ! family(object)$family %in% c("binomial", "betabinomial")) warning(weightsWarning) | |
2895 | |
2896 type <- match.arg(type) | |
2897 | |
2898 out = simulate(object, nsim = nsim, ...) | |
2899 | |
2900 if (type == "normal"){ | |
2901 if (is.matrix(out[[1]])){ | |
2902 # this is for the k/n binomial case | |
2903 out = as.matrix(out)[,seq(1, (2*nsim), by = 2)] | |
2904 } | |
2905 if(!is.matrix(out)) out = data.matrix(out) | |
2906 }else{ | |
2907 | |
2908 # check for weights in k/n case | |
2909 if(family(object)$family %in% c("binomial", "betabinomial")){ | |
2910 if("(weights)" %in% colnames(model.frame(object))){ | |
2911 w = model.frame(object) | |
2912 w = w$`(weights)` | |
2913 tmp <- function(x)x/w | |
2914 out = apply(out, 2, tmp) | |
2915 out = as.data.frame(out) | |
2916 } | |
2917 else if(is.matrix(out[[1]]) & !is.matrix(model.frame(object)[[1]])){ | |
2918 out = as.data.frame(as.matrix(out)[,seq(1, (2*nsim), by = 2)]) | |
2919 } | |
2920 } | |
2921 | |
2922 | |
2923 | |
2924 | |
2925 | |
2926 | |
2927 # matrixResp = | |
2928 # | |
2929 # if(matrixResp & !is.null(ncol(newresp))){ | |
2930 # # Hack to make the factor binomial case work | |
2931 # tmp = colnames(newData[[1]]) | |
2932 # newData[[1]] = NULL | |
2933 # newData = cbind(newresp, newData) | |
2934 # colnames(newData)[1:2] = tmp | |
2935 # } else if(!is.null(ncol(newresp))){ | |
2936 # newData[[1]] = newresp[,1] | |
2937 # } else { | |
2938 # newData[[1]] = newresp | |
2939 # } | |
2940 | |
2941 | |
2942 # if (out$modelClass == "glmmTMB" & ncol(simulations) == 2*n) simObserved = simulations[,(1+(2*(i-1))):(2+(2*(i-1)))] | |
2943 } | |
2944 | |
2945 # else securityAssertion("Simulation results produced unsupported data structure", stop = TRUE) | |
2946 | |
2947 return(out) | |
2948 } | |
2949 | |
2950 ####### spaMM ######### | |
2951 | |
2952 #' @rdname getObservedResponse | |
2953 #' @export | |
2954 getObservedResponse.HLfit <- function(object, ...){ | |
2955 out = spaMM::response(object, ...) | |
2956 | |
2957 nKcase = is.matrix(out) | |
2958 if(nKcase){ | |
2959 if(! (family(object) %in% c("binomial", "betabinomial"))) securityAssertion("nKcase - wrong family") | |
2960 if(! (ncol(out)==2)) securityAssertion("nKcase - wrong dimensions of response") | |
2961 out = out[,1] | |
2962 } | |
2963 | |
2964 if(!is.numeric(out)) out = as.numeric(out) | |
2965 | |
2966 return(out) | |
2967 | |
2968 } | |
2969 | |
2970 #' @rdname getSimulations | |
2971 #' @export | |
2972 getSimulations.HLfit <- function(object, nsim = 1, type = c("normal", "refit"), ...){ | |
2973 | |
2974 type <- match.arg(type) | |
2975 | |
2976 capture.output({out = simulate(object, nsim = nsim, ...)}) | |
2977 | |
2978 if(type == "normal"){ | |
2979 if(!is.matrix(out)) out = data.matrix(out) | |
2980 }else{ | |
2981 out = as.data.frame(out) | |
2982 } | |
2983 return(out) | |
2984 } | |
2985 | |
2986 #' @rdname getRefit | |
2987 #' @export | |
2988 getRefit.HLfit <- function(object, newresp, ...) { | |
2989 spaMM::update_resp(object, newresp, evaluate = TRUE) | |
2990 } | |
2991 | |
2992 ####################### compatibility.R | |
2993 | |
2994 ####################### helper.R | |
2995 | |
2996 #' Modified ECDF function | |
2997 #' | |
2998 #' @details ensures symmetric ECDF (standard ECDF is <), and that 0 / 1 values are only produced if the data is strictly < > than the observed data | |
2999 #' | |
3000 #' @keywords internal | |
3001 DHARMa.ecdf <- function (x) | |
3002 { | |
3003 x <- sort(x) | |
3004 n <- length(x) | |
3005 if (n < 1) | |
3006 stop(paste("DHARMa.ecdf - length vector < 1", x)) | |
3007 vals <- unique(x) | |
3008 rval <- approxfun(vals, cumsum(tabulate(match(x, vals)))/ (n +1), | |
3009 method = "linear", yleft = 0, yright = 1, ties = "ordered") | |
3010 class(rval) <- c("ecdf", "stepfun", class(rval)) | |
3011 assign("nobs", n, envir = environment(rval)) | |
3012 attr(rval, "call") <- sys.call() | |
3013 rval | |
3014 } | |
3015 | |
3016 | |
3017 | |
3018 #' calculate quantiles | |
3019 #' | |
3020 #' calculates residual quantiles from a given simulation | |
3021 #' | |
3022 #' @param simulations a matrix with simulations from a fitted model. Rows = observations, columns = replicate simulations | |
3023 #' @param observed a vector with the observed data | |
3024 #' @param integerResponse is the response integer-valued. Only has an effect for method = "traditional" | |
3025 #' @param method the quantile randomization method used. See details | |
3026 #' | |
3027 #' @details The function calculates residual quantiles from the simulated data. For continous distributions, this will simply the the value of the ecdf. | |
3028 #' | |
3029 #' For discrete data, there are two options implemented. | |
3030 #' | |
3031 #' The current default (available since DHARMa 0.3.1) are probability integral transform (PIT-) residuals (Smith, 1985; Dunn & Smyth, 1996; see also see also Warton, et al., 2017). | |
3032 #' | |
3033 #' Before DHARMa 0.3.1, a different randomization procedure was used, in which the a U(-0.5, 0.5) distribution was added on observations and simualtions for discrete distributions. For a completely discrete distribution, the two procedures should deliver equivalent results, but the second method has the disadvantage that a) one has to know if the distribution is discrete (DHARMa tries to recognize this automatically), and b) that it leads to inefficiencies for some distributions such as the the Tweedie, which are partly continous, partly discrte (see e.g. https://github.com/florianhartig/DHARMa/issues/168). | |
3034 #' | |
3035 #' @references | |
3036 #' | |
3037 #' Smith, J. Q. "Diagnostic checks of non-standard time series models." Journal of Forecasting 4.3 (1985): 283-291. | |
3038 #' | |
3039 #' Dunn, P.K., & Smyth, G.K. (1996). Randomized quantile residuals. Journal of Computational and Graphical Statistics 5, 236-244. | |
3040 #' | |
3041 #' Warton, David I., Loïc Thibaut, and Yi Alice Wang. "The PIT-trap—A “model-free” bootstrap procedure for inference about regression models with discrete, multivariate responses." PloS one 12.7 (2017) | |
3042 #' | |
3043 #' @export | |
3044 getQuantile <- function(simulations, observed, integerResponse, method = c("PIT", "traditional")){ | |
3045 | |
3046 method = match.arg(method) | |
3047 | |
3048 n = length(observed) | |
3049 if (nrow(simulations) != n) stop("DHARMa::getquantile: wrong dimension of simulations") | |
3050 nSim = ncol(simulations) | |
3051 | |
3052 | |
3053 if(method == "traditional"){ | |
3054 | |
3055 if(integerResponse == F){ | |
3056 | |
3057 if(any(duplicated(observed))) message("Model family was recognized or set as continuous, but duplicate values were detected in the response. Consider if you are fitting an appropriate model.") | |
3058 | |
3059 values = as.vector(simulations)[duplicated(as.vector(simulations))] | |
3060 if(length(values) > 0){ | |
3061 if (all(values%%1==0)){ | |
3062 integerResponse = T | |
3063 message("Model family was recognized or set as continuous, but duplicate values were detected in the simulation - changing to integer residuals (see ?simulateResiduals for details)") | |
3064 } else { | |
3065 message("Duplicate non-integer values found in the simulation. If this is because you are fitting a non-inter valued discrete response model, note that DHARMa does not perform appropriate randomization for such cases.") | |
3066 } | |
3067 | |
3068 } | |
3069 } | |
3070 | |
3071 scaledResiduals = rep(NA, n) | |
3072 for (i in 1:n){ | |
3073 if(integerResponse == T){ | |
3074 scaledResiduals[i] <- DHARMa.ecdf(simulations[i,] + runif(nSim, -0.5, 0.5))(observed[i] + runif(1, -0.5, 0.5)) | |
3075 }else{ | |
3076 scaledResiduals[i] <- DHARMa.ecdf(simulations[i,])(observed[i]) | |
3077 } | |
3078 } | |
3079 | |
3080 } else { | |
3081 | |
3082 | |
3083 scaledResiduals = rep(NA, n) | |
3084 for (i in 1:n){ | |
3085 min <- sum(simulations[i,] < observed[i]) / length(simulations[i,]) | |
3086 max <- sum(simulations[i,] <= observed[i]) / length(simulations[i,]) | |
3087 if (min == max) scaledResiduals[i] = DHARMa.ecdf(simulations[i,])(observed[i]) | |
3088 else{ | |
3089 scaledResiduals[i] = runif(1, min, max) | |
3090 } | |
3091 } | |
3092 } | |
3093 | |
3094 return(scaledResiduals) | |
3095 } | |
3096 | |
3097 # | |
3098 # | |
3099 # testData = createData(sampleSize = 200, family = gaussian(), | |
3100 # randomEffectVariance = 0, numGroups = 5) | |
3101 # fittedModel <- glmmTMB(observedResponse ~ Environment1, | |
3102 # data = testData) | |
3103 # simulationOutput <- simulateResiduals(fittedModel = fittedModel) | |
3104 # | |
3105 # sims = simulationOutput$simulatedResponse | |
3106 # sims[1, c(1,6,8)] = 0 | |
3107 # any(apply(sims, 1, anyDuplicated)) | |
3108 # getQuantile(simulations = sims, observed = testData$observedResponse, n = 200, integerResponse = F, nSim = 250) | |
3109 # | |
3110 # | |
3111 # | |
3112 | |
3113 | |
3114 | |
3115 #' Check dot operator | |
3116 #' | |
3117 #' @param name variable name | |
3118 #' @param default variable default | |
3119 #' | |
3120 #' @details modified from https://github.com/lcolladotor/dots | |
3121 #' | |
3122 #' @keywords internal | |
3123 checkDots <- function(name, default, ...) { | |
3124 args <- list(...) | |
3125 if(!name %in% names(args)) { | |
3126 ## Default value | |
3127 return(default) | |
3128 } else { | |
3129 ## If the argument was defined in the ... part, return it | |
3130 return(args[[name]]) | |
3131 } | |
3132 } | |
3133 | |
3134 | |
3135 securityAssertion <- function(context = "Not provided", stop = F){ | |
3136 generalMessage = "Message from DHARMa: During the execution of a DHARMa function, some unexpected conditions occurred. Even if you didn't get an error, your results may not be reliable. Please check with the help if you use the functions as intended. If you think that the error is not on your side, I would be grateful if you could report the problem at https://github.com/florianhartig/DHARMa/issues \n\n Context:" | |
3137 if (stop == F) warning(paste(generalMessage, context)) | |
3138 else stop(paste(generalMessage, context)) | |
3139 } | |
3140 | |
3141 ####################### helper.R | |
3142 | |
3143 ####################### plot.R | |
3144 | |
3145 #' DHARMa standard residual plots | |
3146 #' | |
3147 #' This function creates standard plots for the simulated residuals | |
3148 #' @param x an object with simulated residuals created by \code{\link{simulateResiduals}} | |
3149 #' @param rank if T (default), the values of pred will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed. | |
3150 #' @param ... further options for \code{\link{plotResiduals}}. Consider in particular parameters quantreg, rank and asFactor. xlab, ylab and main cannot be changed when using plotSimulatedResiduals, but can be changed when using plotResiduals. | |
3151 #' @details The function creates two plots. To the left, a qq-uniform plot to detect deviations from overall uniformity of the residuals (calling \code{\link{plotQQunif}}), and to the right, a plot of residuals against predicted values (calling \code{\link{plotResiduals}}). Outliers are highlighted in red (for more on outliers, see \code{\link{testOutliers}}). For a correctly specified model, we would expect | |
3152 #' | |
3153 #' a) a straight 1-1 line in the uniform qq-plot -> evidence for an overall uniform (flat) distribution of the residuals | |
3154 #' | |
3155 #' b) uniformity of residuals in the vertical direction in the res against predictor plot | |
3156 #' | |
3157 #' Deviations of this can be interpreted as for a linear regression. See the vignette for detailed examples. | |
3158 #' | |
3159 #' To provide a visual aid in detecting deviations from uniformity in y-direction, the plot of the residuals against the predicted values also performs an (optional) quantile regression, which provides 0.25, 0.5 and 0.75 quantile lines across the plots. These lines should be straight, horizontal, and at y-values of 0.25, 0.5 and 0.75. Note, however, that some deviations from this are to be expected by chance, even for a perfect model, especially if the sample size is small. See further comments on this plot, its interpretation and options, in \code{\link{plotResiduals}} | |
3160 #' | |
3161 #' The quantile regression can take some time to calculate, especially for larger datasets. For that reason, quantreg = F can be set to produce a smooth spline instead. This is default for n > 2000. | |
3162 #' | |
3163 #' @seealso \code{\link{plotResiduals}}, \code{\link{plotQQunif}} | |
3164 #' @example inst/examples/plotsHelp.R | |
3165 #' @import graphics | |
3166 #' @import utils | |
3167 #' @export | |
3168 plot.DHARMa <- function(x, rank = TRUE, ...){ | |
3169 | |
3170 oldpar <- par(mfrow = c(1,2), oma = c(0,1,2,1)) | |
3171 on.exit(par(oldpar)) | |
3172 | |
3173 plotQQunif(x) | |
3174 plotResiduals(x, rank = rank, ...) | |
3175 | |
3176 mtext("DHARMa residual diagnostics", outer = T) | |
3177 } | |
3178 | |
3179 | |
3180 #' Histogram of DHARMa residuals | |
3181 #' | |
3182 #' The function produces a histogram from a DHARMa output | |
3183 #' | |
3184 #' @param x a DHARMa simulation output (class DHARMa) | |
3185 #' @param breaks breaks for hist() function | |
3186 #' @param col col for hist bars | |
3187 #' @param main plot main | |
3188 #' @param xlab plot xlab | |
3189 #' @param cex.main plot cex.main | |
3190 #' @param ... other arguments to be passed on to hist | |
3191 #' @seealso \code{\link{plotSimulatedResiduals}}, \code{\link{plotResiduals}} | |
3192 #' @example inst/examples/plotsHelp.R | |
3193 #' @export | |
3194 hist.DHARMa <- function(x, | |
3195 breaks = seq(-0.02, 1.02, len = 53), | |
3196 col = c("red",rep("lightgrey",50), "red"), | |
3197 main = "Hist of DHARMa residuals", | |
3198 xlab = "Residuals (outliers are marked red)", | |
3199 cex.main = 1, | |
3200 ...){ | |
3201 | |
3202 x = ensureDHARMa(x, convert = T) | |
3203 | |
3204 val = x$scaledResiduals | |
3205 val[val == 0] = -0.01 | |
3206 val[val == 1] = 1.01 | |
3207 | |
3208 hist(val, breaks = breaks, col = col, main = main, xlab = xlab, cex.main = cex.main, ...) | |
3209 } | |
3210 | |
3211 | |
3212 #' DHARMa standard residual plots | |
3213 #' | |
3214 #' DEPRECATED, use plot() instead | |
3215 #' | |
3216 #' @param simulationOutput an object with simulated residuals created by \code{\link{simulateResiduals}} | |
3217 #' @param ... further options for \code{\link{plotResiduals}}. Consider in particular parameters quantreg, rank and asFactor. xlab, ylab and main cannot be changed when using plotSimulatedResiduals, but can be changed when using plotResiduals. | |
3218 #' @note This function is deprecated. Use \code{\link{plot.DHARMa}} | |
3219 #' | |
3220 #' @seealso \code{\link{plotResiduals}}, \code{\link{plotQQunif}} | |
3221 #' @export | |
3222 plotSimulatedResiduals <- function(simulationOutput, ...){ | |
3223 message("plotSimulatedResiduals is deprecated, please switch your code to simply using the plot() function") | |
3224 plot(simulationOutput, ...) | |
3225 } | |
3226 | |
3227 | |
3228 #' Quantile-quantile plot for a uniform distribution | |
3229 #' | |
3230 #' The function produces a uniform quantile-quantile plot from a DHARMa output | |
3231 #' | |
3232 #' @param simulationOutput a DHARMa simulation output (class DHARMa) | |
3233 #' @param testUniformity if T, the function \code{\link{testUniformity}} will be called and the result will be added to the plot | |
3234 #' @param testOutliers if T, the function \code{\link{testOutliers}} will be called and the result will be added to the plot | |
3235 #' @param testDispersion if T, the function \code{\link{testDispersion}} will be called and the result will be added to the plot | |
3236 #' @param ... arguments to be passed on to \code{\link[gap]{qqunif}} | |
3237 #' | |
3238 #' @details the function calls qqunif from the R package gap to create a quantile-quantile plot for a uniform distribution. | |
3239 #' @seealso \code{\link{plotSimulatedResiduals}}, \code{\link{plotResiduals}} | |
3240 #' @example inst/examples/plotsHelp.R | |
3241 #' @export | |
3242 plotQQunif <- function(simulationOutput, testUniformity = T, testOutliers = T, testDispersion = T, ...){ | |
3243 | |
3244 simulationOutput = ensureDHARMa(simulationOutput, convert = "Model") | |
3245 | |
3246 gap::qqunif(simulationOutput$scaledResiduals,pch=2,bty="n", logscale = F, col = "black", cex = 0.6, main = "QQ plot residuals", cex.main = 1, ...) | |
3247 | |
3248 if(testUniformity == TRUE){ | |
3249 temp = testUniformity(simulationOutput, plot = F) | |
3250 legend("topleft", c(paste("KS test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n") | |
3251 } | |
3252 | |
3253 if(testOutliers == TRUE){ | |
3254 temp = testOutliers(simulationOutput, plot = F) | |
3255 legend("bottomright", c(paste("Outlier test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n") | |
3256 } | |
3257 | |
3258 if(testDispersion == TRUE){ | |
3259 temp = testDispersion(simulationOutput, plot = F) | |
3260 legend("center", c(paste("Dispersion test: p=", round(temp$p.value, digits = 5)), paste("Deviation ", ifelse(temp$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(temp$p.value < 0.05, "red", "black" ), bty="n") | |
3261 } | |
3262 | |
3263 } | |
3264 | |
3265 | |
3266 #' Generic res ~ pred scatter plot with spline or quantile regression on top | |
3267 #' | |
3268 #' The function creates a generic residual plot with either spline or quantile regression to highlight patterns in the residuals. Outliers are highlighted in red. | |
3269 #' | |
3270 #' @param simulationOutput on object, usually a DHARMa object, from which residual values can be extracted. Alternatively, a vector with residuals or a fitted model can be provided, which will then be transformed into a DHARMa object. | |
3271 #' @param form optional predictor against which the residuals should be plotted. Default is to used the predicted(simulationOutput) | |
3272 #' @param quantreg whether to perform a quantile regression on 0.25, 0.5, 0.75 on the residuals. If F, a spline will be created instead. Default NULL chooses T for nObs < 2000, and F otherwise. | |
3273 #' @param rank if T, the values provided in form will be rank transformed. This will usually make patterns easier to spot visually, especially if the distribution of the predictor is skewed. If form is a factor, this has no effect. | |
3274 #' @param asFactor should a numeric predictor provided in form be treated as a factor. Default is to choose this for < 10 unique values, as long as enough predictions are available to draw a boxplot. | |
3275 #' @param smoothScatter if T, a smooth scatter plot will plotted instead of a normal scatter plot. This makes sense when the number of residuals is very large. Default NULL chooses T for nObs < 10000, and F otherwise. | |
3276 #' @param quantiles for a quantile regression, which quantiles should be plotted | |
3277 #' @param ... additional arguments to plot / boxplot. | |
3278 #' @details The function plots residuals against a predictor (by default against the fitted value, extracted from the DHARMa object, or any other predictor). | |
3279 #' | |
3280 #' Outliers are highlighted in red (for information on definition and interpretation of outliers, see \code{\link{testOutliers}}). | |
3281 #' | |
3282 #' To provide a visual aid in detecting deviations from uniformity in y-direction, the plot function calculates an (optional) quantile regression, which compares the empirical 0.25, 0.5 and 0.75 quantiles (default) in y direction (red solid lines) with the theoretical 0.25, 0.5 and 0.75 quantiles (dashed black line). | |
3283 #' | |
3284 #' Asymptotically (i.e. for lots of data / residuals), if the model is correct, theoretical and the empirical quantiles should be identical (i.e. dashed and solid lines should match). A p-value for the deviation is calculated for each quantile line. Significant deviations are highlighted by red color. | |
3285 #' | |
3286 #' If form is a factor, a boxplot will be plotted instead of a scatter plot. The distribution for each factor level should be uniformly distributed, so the box should go from 0.25 to 0.75, with the median line at 0.5. Again, chance deviations from this will increases when the sample size is smaller. You can run null simulations to test if the deviations you see exceed what you would expect from random variation. If you want to create box plots for categorical predictors (e.g. because you only have a small number of unique numeric predictor values), you can convert your predictor with as.factor(pred) | |
3287 #' | |
3288 #' @return if quantile tests are performed, the function returns them invisibly. | |
3289 #' | |
3290 #' @note The quantile regression can take some time to calculate, especially for larger datasets. For that reason, quantreg = F can be set to produce a smooth spline instead. | |
3291 #' | |
3292 #' @seealso \code{\link{plotQQunif}} | |
3293 #' @example inst/examples/plotsHelp.R | |
3294 #' @export | |
3295 plotResiduals <- function(simulationOutput, form = NULL, quantreg = NULL, rank = F, asFactor = NULL, smoothScatter = NULL, quantiles = c(0.25, 0.5, 0.75), ...){ | |
3296 | |
3297 | |
3298 ##### Checks ##### | |
3299 | |
3300 | |
3301 a <- list(...) | |
3302 a$ylab = checkDots("ylab", "Standardized residual", ...) | |
3303 if(is.null(form)){ | |
3304 a$xlab = checkDots("xlab", ifelse(rank, "Model predictions (rank transformed)", "Model predictions"), ...) | |
3305 } | |
3306 | |
3307 simulationOutput = ensureDHARMa(simulationOutput, convert = T) | |
3308 res = simulationOutput$scaledResiduals | |
3309 if(inherits(form, "DHARMa"))stop("DHARMa::plotResiduals > argument form cannot be of class DHARMa. Note that the syntax of plotResiduals has changed since DHARMa 0.3.0. See ?plotResiduals.") | |
3310 | |
3311 pred = ensurePredictor(simulationOutput, form) | |
3312 | |
3313 ##### Rank transform and factor conversion##### | |
3314 | |
3315 if(!is.factor(pred)){ | |
3316 | |
3317 if (rank == T){ | |
3318 pred = rank(pred, ties.method = "average") | |
3319 pred = pred / max(pred) | |
3320 } | |
3321 | |
3322 nuniq = length(unique(pred)) | |
3323 ndata = length(pred) | |
3324 if(is.null(asFactor)) asFactor = (nuniq == 1) | (nuniq < 10 & ndata / nuniq > 10) | |
3325 if (asFactor) pred = factor(pred) | |
3326 } | |
3327 | |
3328 ##### Residual scatter plots ##### | |
3329 | |
3330 if(is.null(quantreg)) if (length(res) > 2000) quantreg = FALSE else quantreg = TRUE | |
3331 | |
3332 switchScatter = 10000 | |
3333 if(is.null(smoothScatter)) if (length(res) > switchScatter) smoothScatter = TRUE else smoothScatter = FALSE | |
3334 | |
3335 blackcol = rgb(0,0,0, alpha = max(0.1, 1 - 3 * length(res) / switchScatter)) | |
3336 | |
3337 | |
3338 # categorical plot | |
3339 if(is.factor(pred)){ | |
3340 do.call(plot, append(list(res ~ pred, ylim = c(0,1), axes = FALSE), a)) | |
3341 } | |
3342 # smooth scatter | |
3343 else if (smoothScatter == TRUE) { | |
3344 defaultCol = ifelse(res == 0 | res == 1, 2,blackcol) | |
3345 do.call(graphics::smoothScatter, append(list(x = pred, y = res , ylim = c(0,1), axes = FALSE, colramp = colorRampPalette(c("white", "darkgrey"))),a)) | |
3346 points(pred[defaultCol == 2], res[defaultCol == 2], col = "red", cex = 0.5) | |
3347 } | |
3348 # normal plot | |
3349 else{ | |
3350 defaultCol = ifelse(res == 0 | res == 1, 2,blackcol) | |
3351 defaultPch = ifelse(res == 0 | res == 1, 8,1) | |
3352 a$col = checkDots("col", defaultCol, ...) | |
3353 a$pch = checkDots("pch", defaultPch, ...) | |
3354 do.call(plot, append(list(res ~ pred, ylim = c(0,1), axes = FALSE), a)) | |
3355 } | |
3356 | |
3357 axis(1) | |
3358 axis(2, at=c(0, 0.25, 0.5, 0.75, 1)) | |
3359 | |
3360 ##### Quantile regressions ##### | |
3361 | |
3362 main = checkDots("main", "Residual vs. predicted", ...) | |
3363 out = NULL | |
3364 | |
3365 if(is.numeric(pred)){ | |
3366 if(quantreg == F){ | |
3367 title(main = main, cex.main = 1) | |
3368 abline(h = c(0.25, 0.5, 0.75), col = "black", lwd = 0.5, lty = 2) | |
3369 try({ | |
3370 lines(smooth.spline(pred, res, df = 10), lty = 2, lwd = 2, col = "red") | |
3371 abline(h = 0.5, col = "red", lwd = 2) | |
3372 }, silent = T) | |
3373 }else{ | |
3374 | |
3375 out = testQuantiles(simulationOutput, pred, quantiles = quantiles, plot = F) | |
3376 | |
3377 | |
3378 if(any(out$pvals < 0.05, na.rm = TRUE)){ | |
3379 main = paste(main, "Quantile deviations detected (red curves)", sep ="\n") | |
3380 if(out$p.value <= 0.05){ | |
3381 main = paste(main, "Combined adjusted quantile test significant", sep ="\n") | |
3382 } else { | |
3383 main = paste(main, "Combined adjusted quantile test n.s.", sep ="\n") | |
3384 } | |
3385 maincol = "red" | |
3386 } else { | |
3387 main = paste(main, "No significant problems detected", sep ="\n") | |
3388 maincol = "black" | |
3389 } | |
3390 | |
3391 | |
3392 title(main = main, cex.main = 0.8, | |
3393 col.main = maincol) | |
3394 | |
3395 for(i in 1:length(quantiles)){ | |
3396 | |
3397 lineCol = ifelse(out$pvals[i] <= 0.05 & !(is.na(out$pvals[i])), "red", "black") | |
3398 filCol = ifelse(out$pvals[i] <= 0.05 & !(is.na(out$pvals[i])), "#FF000040", "#00000020") | |
3399 | |
3400 abline(h = quantiles[i], col = lineCol, lwd = 0.5, lty = 2) | |
3401 polygon(c(out$predictions$pred, rev(out$predictions$pred)), | |
3402 c(out$predictions[,2*i] - out$predictions[,2*i+1], rev(out$predictions[,2*i] + out$predictions[,2*i+1])), | |
3403 col = "#00000020", border = F) | |
3404 lines(out$predictions$pred, out$predictions[,2*i], col = lineCol, lwd = 2) | |
3405 } | |
3406 | |
3407 # legend("bottomright", c(paste("Quantile test: p=", round(out$p.value, digits = 5)), paste("Deviation ", ifelse(out$p.value < 0.05, "significant", "n.s."))), text.col = ifelse(out$p.value < 0.05, "red", "black" ), bty="n") | |
3408 | |
3409 } | |
3410 } | |
3411 invisible(out) | |
3412 } | |
3413 | |
3414 x = 0.01 | |
3415 x <= 0.05 & !(is.na(x)) | |
3416 | |
3417 | |
3418 #' Ensures the existence of a valid predictor to plot residuals against | |
3419 #' | |
3420 #' @param simulationOutput a DHARMa simulation output or an object that can be converted into a DHARMa simulation output | |
3421 #' @param predictor an optional predictor. If no predictor is provided, will try to extract the fitted value | |
3422 #' @keywords internal | |
3423 ensurePredictor <- function(simulationOutput, | |
3424 predictor = NULL){ | |
3425 if(!is.null(predictor)){ | |
3426 | |
3427 if(length(predictor) != length(simulationOutput$scaledResiduals)) stop("DHARMa: residuals and predictor do not have the same length. The issue is possibly that you have NAs in your predictor that were removed during the model fit. Remove the NA values from your predictor.") | |
3428 } else { | |
3429 | |
3430 predictor = simulationOutput$fittedPredictedResponse | |
3431 if(is.null(predictor)) stop("DHARMa: can't extract predictor from simulationOutput, and no predictor provided") | |
3432 } | |
3433 return(predictor) | |
3434 } | |
3435 | |
3436 | |
3437 | |
3438 | |
3439 #plot(simulationOutput) | |
3440 | |
3441 #plot(simulationOutput$observedResponse, simulationOutput$scaledResiduals, xlab = "predicted", ylab = "Residual", main = "Residual vs. predicted") | |
3442 | |
3443 #plot(simulationOutput$observedResponse, simulationOutput$fittedPredictedResponse - simulationOutput$observedResponse) | |
3444 | |
3445 #plot(cumsum(sort(simulationOutput$scaledResiduals))) | |
3446 | |
3447 | |
3448 #plotConventionalResiduals(fittedModel) | |
3449 | |
3450 | |
3451 #' Conventional residual plot | |
3452 #' | |
3453 #' Convenience function to draw conventional residual plots | |
3454 #' | |
3455 #' @param fittedModel a fitted model object | |
3456 #' @export | |
3457 plotConventionalResiduals <- function(fittedModel){ | |
3458 opar <- par(mfrow = c(1,3), oma = c(0,1,2,1)) | |
3459 on.exit(par(opar)) | |
3460 plot(predict(fittedModel), resid(fittedModel, type = "deviance"), main = "Deviance" , ylab = "Residual", xlab = "Predicted") | |
3461 plot(predict(fittedModel), resid(fittedModel, type = "pearson") , main = "Pearson", ylab = "Residual", xlab = "Predicted") | |
3462 plot(predict(fittedModel), resid(fittedModel, type = "response") , main = "Raw residuals" , ylab = "Residual", xlab = "Predicted") | |
3463 mtext("Conventional residual plots", outer = T) | |
3464 } | |
3465 | |
3466 | |
3467 | |
3468 | |
3469 # | |
3470 # | |
3471 # if(quantreg == F){ | |
3472 # | |
3473 # lines(smooth.spline(simulationOutput$fittedPredictedResponse, simulationOutput$scaledResiduals, df = 10), lty = 2, lwd = 2, col = "red") | |
3474 # | |
3475 # abline(h = 0.5, col = "red", lwd = 2) | |
3476 # | |
3477 # }else{ | |
3478 # | |
3479 # #library(gamlss) | |
3480 # | |
3481 # # qrnn | |
3482 # | |
3483 # # http://r.789695.n4.nabble.com/Quantile-GAM-td894280.html | |
3484 # | |
3485 # #require(quantreg) | |
3486 # #dat <- plyr::arrange(dat,pred) | |
3487 # #fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.5,data = dat) | |
3488 # | |
3489 # probs = c(0.25, 0.50, 0.75) | |
3490 # | |
3491 # w <- p <- list() | |
3492 # for(i in seq_along(probs)){ | |
3493 # capture.output(w[[i]] <- qrnn::qrnn.fit(x = as.matrix(simulationOutput$fittedPredictedResponse), y = as.matrix(simulationOutput$scaledResiduals), n.hidden = 4, tau = probs[i], iter.max = 1000, n.trials = 1, penalty = 1)) | |
3494 # p[[i]] <- qrnn::qrnn.predict(as.matrix(sort(simulationOutput$fittedPredictedResponse)), w[[i]]) | |
3495 # } | |
3496 # | |
3497 # | |
3498 # | |
3499 # #plot(simulationOutput$fittedPredictedResponse, simulationOutput$scaledResiduals, xlab = "Predicted", ylab = "Residual", main = "Residual vs. predicted\n lines should match", cex.main = 1) | |
3500 # | |
3501 # #lines(sort(simulationOutput$fittedPredictedResponse), as.vector(p[[1]]), col = "red") | |
3502 # | |
3503 # matlines(sort(simulationOutput$fittedPredictedResponse), matrix(unlist(p), nrow = length(simulationOutput$fittedPredictedResponse), ncol = length(p)), col = "red", lty = 1) | |
3504 # | |
3505 # # as.vector(p[[1]]) | |
3506 # # | |
3507 # # | |
3508 # # lines(simulationOutput$fittedPredictedResponse,p[[1]], col = "red", lwd = 2) | |
3509 # # abline(h = 0.5, col = "red", lwd = 2) | |
3510 # # | |
3511 # # fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.25,data = dat) | |
3512 # # lines(unique(dat$pred)[-1],fit$coef[1] + fit$coef[-1], col = "green", lwd = 2, lty =2) | |
3513 # # abline(h = 0.25, col = "green", lwd = 2, lty =2) | |
3514 # # | |
3515 # # fit<-quantreg::rqss(resid~qss(pred,constraint="N"),tau=0.75,data = dat) | |
3516 # # lines(unique(dat$pred)[-1],fit$coef[1] + fit$coef[-1], col = "blue", lwd = 2, lty = 2) | |
3517 # # abline(h = 0.75, col = "blue", lwd = 2, lty =2) | |
3518 # } | |
3519 | |
3520 ####################### plot.R | |
3521 | |
3522 ####################### random.R | |
3523 | |
3524 #' Record and restore a random state | |
3525 #' | |
3526 #' The aim of this function is to record, manipualate and restor a random state | |
3527 #' | |
3528 #' @details This function is intended for two (not mutually exclusive tasks) | |
3529 #' | |
3530 #' a) record the current random state | |
3531 #' | |
3532 #' b) change the current random state in a way that the previous state can be restored | |
3533 #' | |
3534 #' @return a list with various infos about the random state that after function execution, as well as a function to restore the previous state before the function execution | |
3535 #' | |
3536 #' @param seed seed argument to set.seed(). NULL = no seed, but random state will be restored. F = random state will not be restored | |
3537 #' @export | |
3538 #' @example inst/examples/getRandomStateHelp.R | |
3539 #' @author Florian Hartig | |
3540 #' | |
3541 getRandomState <- function(seed = NULL){ | |
3542 | |
3543 # better to explicitly access the global RS? | |
3544 # current = get(".Random.seed", .GlobalEnv, ifnotfound = NULL) | |
3545 | |
3546 current = mget(".Random.seed", envir = .GlobalEnv, ifnotfound = list(NULL))[[1]] | |
3547 | |
3548 if(is.logical(seed) & seed == F){ | |
3549 restoreCurrent <- function(){} | |
3550 }else{ | |
3551 restoreCurrent <- function(){ | |
3552 if(is.null(current)) rm(".Random.seed", envir = .GlobalEnv) else assign(".Random.seed", current , envir = .GlobalEnv) | |
3553 } | |
3554 } | |
3555 | |
3556 # setting seed | |
3557 if(is.numeric(seed)) set.seed(seed) | |
3558 | |
3559 # ensuring that RNG has been initialized | |
3560 if (is.null(current))runif(1) | |
3561 | |
3562 randomState = list(seed, state = get(".Random.seed", globalenv()), kind = RNGkind(), restoreCurrent = restoreCurrent) | |
3563 return(randomState) | |
3564 } | |
3565 | |
3566 ####################### random.R | |
3567 | |
3568 ######################################### Package DHARMa |