0
|
1 ### R-Script Adapted from script provided by the CEH, UK BY: Reto Schmucki [ reto.schmucki@mail.mcgill.ca]
|
|
2 ### DATE: 14 July 2014 function to run two stage model in DENNIS et al. 2013
|
|
3
|
|
4
|
|
5 .onAttach <- function(libname, pkgname) {
|
|
6 packageStartupMessage(" The regionalGAM package that is no longer maintained, \n use the new rbms package instead. \n
|
|
7 devtools::install_github(\"RetoSchmucki/rbms\", force=TRUE)")
|
|
8 }
|
|
9
|
|
10
|
|
11 #' year_day_func Function
|
|
12 #' This function generate the full sequence of days, months and include the observation to that file.
|
|
13 #' @param sp_data A data.frame with your observation.
|
|
14 #' @keywords year days
|
|
15 #' @export
|
|
16 #' @author Reto Schmucki
|
|
17 #' @examples
|
|
18 #' year_day_func()
|
|
19
|
|
20
|
|
21 # FUNCTIONS
|
|
22
|
|
23 year_day_func = function(sp_data) {
|
|
24
|
|
25 year <- unique(sp_data$YEAR)
|
|
26
|
|
27 origin.d <- paste(year, "01-01", sep = "-")
|
|
28 if ((year%%4 == 0) & ((year%%100 != 0) | (year%%400 == 0))) {
|
|
29 nday <- 366
|
|
30 } else {
|
|
31 nday <- 365
|
|
32 }
|
|
33
|
|
34 date.serie <- as.POSIXlt(seq(as.Date(origin.d), length = nday, by = "day"), format = "%Y-%m-%d")
|
|
35
|
|
36 dayno <- as.numeric(julian(date.serie, origin = as.Date(origin.d)) + 1)
|
|
37 month <- as.numeric(strftime(date.serie, format = "%m"))
|
|
38 week <- as.numeric(strftime(date.serie, format = "%W"))
|
|
39 week_day <- as.numeric(strftime(date.serie, format = "%u"))
|
|
40 day <- as.numeric(strftime(date.serie, format = "%d"))
|
|
41
|
|
42 site_list <- sp_data[!duplicated(sp_data$SITE), c("SITE")]
|
|
43
|
|
44 all_day_site <- data.frame(SPECIES = sp_data$SPECIES[1], SITE = rep(site_list, rep(nday, length(site_list))),
|
|
45 YEAR = sp_data$YEAR[1], MONTH = month, WEEK = week, DAY = day, DAY_WEEK = week_day, DAYNO = dayno,
|
|
46 COUNT = NA)
|
|
47
|
|
48 count_index <- match(paste(sp_data$SITE, sp_data$DAYNO, sep = "_"), paste(all_day_site$SITE, all_day_site$DAYNO,
|
|
49 sep = "_"))
|
|
50 all_day_site$COUNT[count_index] <- sp_data$COUNT
|
|
51 site_count_length <- aggregate(sp_data$COUNT, by = list(sp_data$SITE), function(x) list(1:length(x)))
|
|
52 names(site_count_length$x) <- as.character(site_count_length$Group.1)
|
|
53 site_countno <- utils::stack(site_count_length$x)
|
|
54 all_day_site$COUNTNO <- NA
|
|
55 all_day_site$COUNTNO[count_index] <- site_countno$values # add count number to ease extraction of single count
|
|
56
|
|
57 # Add zero to close observation season two weeks before and after the first and last
|
|
58 first_obs <- min(all_day_site$DAYNO[!is.na(all_day_site$COUNT)])
|
|
59 last_obs <- max(all_day_site$DAYNO[!is.na(all_day_site$COUNT)])
|
|
60
|
|
61 closing_season <- c((first_obs - 11):(first_obs - 7), (last_obs + 7):(last_obs + 11))
|
|
62
|
|
63 # If closing season is before day 1 or day 365, simply set the first and last 5 days to 0
|
|
64 if (min(closing_season) < 1)
|
|
65 closing_season[1:5] <- c(1:5)
|
|
66 if (max(closing_season) > nday)
|
|
67 closing_season[6:10] <- c((nday - 4):nday)
|
|
68
|
|
69 all_day_site$COUNT[all_day_site$DAYNO %in% closing_season] <- 0
|
|
70 all_day_site$ANCHOR <- 0
|
|
71 all_day_site$ANCHOR[all_day_site$DAYNO %in% closing_season] <- 1
|
|
72
|
|
73 all_day_site <- all_day_site[order(all_day_site$SITE, all_day_site$DAYNO), ]
|
|
74
|
|
75 return(all_day_site)
|
|
76 }
|
|
77
|
|
78
|
|
79 #' trap_area Function
|
|
80 #'
|
|
81 #' This function compute the area under the curve using the trapezoid method.
|
|
82 #' @param x A vector or a two columns matrix.
|
|
83 #' @param y A vector, Default is NULL
|
|
84 #' @keywords trapezoid
|
|
85 #' @export
|
|
86 #' @examples
|
|
87 #' trap_area()
|
|
88
|
|
89
|
|
90 trap_area = function(x, y = NULL) {
|
|
91 # If y is null and x has multiple columns then set y to x[,2] and x to x[,1]
|
|
92 if (is.null(y)) {
|
|
93 if (length(dim(x)) == 2) {
|
|
94 y = x[, 2]
|
|
95 x = x[, 1]
|
|
96 } else {
|
|
97 stop("ERROR: need to either specifiy both x and y or supply a two column data.frame/matrix to x")
|
|
98 }
|
|
99 }
|
|
100
|
|
101 # Check x and y are same length
|
|
102 if (length(x) != length(y)) {
|
|
103 stop("ERROR: x and y need to be the same length")
|
|
104 }
|
|
105
|
|
106 # Need to exclude any pairs that are NA for either x or y
|
|
107 rm_inds = which(is.na(x) | is.na(y))
|
|
108 if (length(rm_inds) > 0) {
|
|
109 x = x[-rm_inds]
|
|
110 y = y[-rm_inds]
|
|
111 }
|
|
112
|
|
113 # Determine values of trapezoids under curve Get inds
|
|
114 inds = 1:(length(x) - 1)
|
|
115 # Determine area using trapezoidal rule Area = ( (b1 + b2)/2 ) * h where b1 and b2 are lengths of bases
|
|
116 # (the parallel sides) and h is the height (the perpendicular distance between two bases)
|
|
117 areas = ((y[inds] + y[inds + 1])/2) * diff(x)
|
|
118
|
|
119 # total area is sum of all trapezoid areas
|
|
120 tot_area = sum(areas)
|
|
121
|
|
122 # Return total area
|
|
123 return(tot_area)
|
|
124 }
|
|
125
|
|
126
|
|
127 #' trap_index Function
|
|
128 #'
|
|
129 #' This function compute the area under the curve (Abundance Index) across species, sites and years
|
|
130 #' @param sp_data A data.frame containing species count data generated from the year_day_func()
|
|
131 #' @param y A vector, Default is NULL
|
|
132 #' @keywords Abundance index
|
|
133 #' @export
|
|
134 #' @examples
|
|
135 #' trap_index()
|
|
136
|
|
137
|
|
138
|
|
139 trap_index = function(sp_data, data_col = "IMP", time_col = "DAYNO", by_col = c("SPECIES", "SITE", "YEAR")) {
|
|
140
|
|
141 # Build output data.frame
|
|
142 out_obj = unique(sp_data[, by_col])
|
|
143 # Set row.names to be equal to collapsing of output rows (will be unique, you need them to make uploading
|
|
144 # values back to data.frame will be easier)
|
|
145 row.names(out_obj) = apply(out_obj, 1, paste, collapse = "_")
|
|
146
|
|
147 # Using this row.names from out_obj above as index in by function to loop through values all unique combs
|
|
148 # of by_cols and fit trap_area to data
|
|
149 ind_dat = by(sp_data[, c(time_col, data_col)], apply(sp_data[, by_col], 1, paste, collapse = "_"), trap_area)
|
|
150
|
|
151 # Add this data to output object
|
|
152 out_obj[names(ind_dat), "SINDEX"] = round(ind_dat/7, 1)
|
|
153
|
|
154 # Set row.names to defaults
|
|
155 row.names(out_obj) = NULL
|
|
156
|
|
157 # Return output object
|
|
158 return(out_obj)
|
|
159 }
|
|
160
|
|
161
|
|
162 #' flight_curve Function
|
|
163 #' This function compute the flight curve across and years
|
|
164 #' @param your_dataset A data.frame containing original species count data. The data format is a csv (comma "," separated) with 6 columns with headers, namely "species","transect_id","visit_year","visit_month","visit_day","sp_count"
|
|
165 #' @keywords standardize flight curve
|
|
166 #' @export
|
|
167 #' @examples
|
|
168 #' flight_curve()
|
|
169
|
|
170
|
|
171 flight_curve <- function(your_dataset) {
|
|
172
|
|
173 if("mgcv" %in% installed.packages() == "FALSE") {
|
|
174 print("mgcv package is not installed.")
|
|
175 x <- readline("Do you want to install it? Y/N")
|
|
176 if (x == 'Y') {
|
|
177 install.packages("mgcv")
|
|
178 }
|
|
179 if (x == 'N') {
|
|
180 stop("flight curve can not be computed without the mgcv package, sorry")
|
|
181 }
|
|
182 }
|
|
183 your_dataset$DAYNO <- strptime(paste(your_dataset$DAY, your_dataset$MONTH,
|
|
184 your_dataset$YEAR, sep = "/"), "%d/%m/%Y")$yday + 1
|
|
185 dataset <- your_dataset[, c("SPECIES", "SITE", "YEAR", "MONTH",
|
|
186 "DAY", "DAYNO", "COUNT")]
|
|
187 sample_year <- unique(dataset$YEAR)
|
|
188 sample_year <- sample_year[order(sample_year)]
|
|
189 if (length(sample_year) >1 ) {
|
|
190 for (y in sample_year) {
|
|
191 dataset_y <- dataset[dataset$YEAR == y, ]
|
|
192 nsite <- length(unique(dataset_y$SITE))
|
|
193 # Determine missing days and add to dataset
|
|
194 sp_data_all <- year_day_func(dataset_y)
|
|
195 if (nsite > 200) {
|
|
196 sp_data_all <- sp_data_all[as.character(sp_data_all$SITE) %in% as.character(unique(dataset_y$SITE)[sample(1:nsite,
|
|
197 200, replace = F)]), ]
|
|
198 sp_data_all <- sp_data_all
|
|
199 }
|
|
200 sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1
|
|
201 print(paste("Fitting the GAM for",as.character(sp_data_all$SPECIES[1]),"and year",y,"with",length(unique(sp_data_all$SITE)),"sites :",Sys.time()))
|
|
202 if(length(unique(sp_data_all$SITE))>1){
|
|
203 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) -1,
|
|
204 data = sp_data_all, family = poisson(link = "log")), silent = TRUE)
|
|
205 }
|
|
206 else {
|
|
207 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1,
|
|
208 data = sp_data_all, family = poisson(link = "log")), silent = TRUE)
|
|
209 }
|
|
210 # Give a second try if the GAM does not converge the first time
|
|
211 if (class(gam_obj_site)[1] == "try-error") {
|
|
212 # Determine missing days and add to dataset
|
|
213 sp_data_all <- year_day_func(dataset_y)
|
|
214 if (nsite > 200) {
|
|
215 sp_data_all <- sp_data_all[as.character(sp_data_all$SITE) %in% as.character(unique(dataset_y$SITE)[sample(1:nsite,
|
|
216 200, replace = F)]), ]
|
|
217 }
|
|
218 else {
|
|
219 sp_data_all <- sp_data_all
|
|
220 }
|
|
221 sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1
|
|
222 print(paste("Fitting the GAM for",sp_data_all$SPECIES[1],"at year", y,"with",length(unique(sp_data_all$SITE)),"sites :",Sys.time(),"second try"))
|
|
223 if(length(unique(sp_data_all$SITE))>1){
|
|
224 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) -1,
|
|
225 data = sp_data_all, family = poisson(link = "log")), silent = TRUE)
|
|
226 }
|
|
227 else {
|
|
228 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1,
|
|
229 data = sp_data_all, family = poisson(link = "log")), silent = TRUE)
|
|
230 }
|
|
231 if (class(gam_obj_site)[1] == "try-error") {
|
|
232 print(paste("Error in fitting the flight period for",sp_data_all$SPECIES[1],"at year", y,"no convergence after two trial"))
|
|
233 sp_data_all[, "FITTED"] <- NA
|
|
234 sp_data_all[, "COUNT_IMPUTED"] <- NA
|
|
235 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA
|
|
236 sp_data_all[, "NM"] <- NA
|
|
237 }
|
|
238 else {
|
|
239 # Generate a list of values for all days from the additive model and use
|
|
240 # these value to fill the missing observations
|
|
241 sp_data_all[, "FITTED"] <- mgcv::predict.gam(gam_obj_site, newdata = sp_data_all[,
|
|
242 c("trimDAYNO", "SITE")], type = "response")
|
|
243 # force zeros at the beginning end end of the year
|
|
244 sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0
|
|
245 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0
|
|
246 # if infinite number in predict replace with NA.
|
|
247 if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){
|
|
248 print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values"))
|
|
249 sp_data_all[, "FITTED"] <- NA
|
|
250 sp_data_all[, "COUNT_IMPUTED"] <- NA
|
|
251 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA
|
|
252 sp_data_all[, "NM"] <- NA
|
|
253 }
|
|
254 else {
|
|
255 sp_data_all[, "COUNT_IMPUTED"] <- sp_data_all$COUNT
|
|
256 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- sp_data_all$FITTED[is.na(sp_data_all$COUNT)]
|
|
257 # Define the flight curve from the fitted values and append them over
|
|
258 # years (this is one flight curve per year for all site)
|
|
259 site_sums <- aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE),
|
|
260 FUN = sum)
|
|
261 # Rename sum column
|
|
262 names(site_sums)[names(site_sums) == "x"] <- "SITE_YR_FSUM"
|
|
263 # Add data to sp_data data.frame (ensure merge does not sort the data!)
|
|
264 sp_data_all = merge(sp_data_all, site_sums, by <- c("SITE"),
|
|
265 all = TRUE, sort = FALSE)
|
|
266 # Calculate normalized values
|
|
267 sp_data_all[, "NM"] <- sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM
|
|
268 }
|
|
269 }
|
|
270 }
|
|
271 else {
|
|
272 # Generate a list of values for all days from the additive model and use
|
|
273 # these value to fill the missing observations
|
|
274 sp_data_all[, "FITTED"] <- mgcv::predict.gam(gam_obj_site, newdata = sp_data_all[,
|
|
275 c("trimDAYNO", "SITE")], type = "response")
|
|
276 # force zeros at the beginning end end of the year
|
|
277 sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0
|
|
278 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0
|
|
279 # if infinite number in predict replace with NA.
|
|
280 if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){
|
|
281 print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values"))
|
|
282 sp_data_all[, "FITTED"] <- NA
|
|
283 sp_data_all[, "COUNT_IMPUTED"] <- NA
|
|
284 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA
|
|
285 sp_data_all[, "NM"] <- NA
|
|
286 }
|
|
287 else {
|
|
288 sp_data_all[, "COUNT_IMPUTED"] <- sp_data_all$COUNT
|
|
289 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- sp_data_all$FITTED[is.na(sp_data_all$COUNT)]
|
|
290 # Define the flight curve from the fitted values and append them over
|
|
291 # years (this is one flight curve per year for all site)
|
|
292 site_sums = aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE),
|
|
293 FUN = sum)
|
|
294 # Rename sum column
|
|
295 names(site_sums)[names(site_sums) == "x"] = "SITE_YR_FSUM"
|
|
296 # Add data to sp_data data.frame (ensure merge does not sort the data!)
|
|
297 sp_data_all = merge(sp_data_all, site_sums, by = c("SITE"), all = TRUE,
|
|
298 sort = FALSE)
|
|
299 # Calculate normalized values
|
|
300 sp_data_all[, "NM"] = sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM
|
|
301 }
|
|
302 }
|
|
303 sp_data_filled <- sp_data_all
|
|
304 flight_curve <- data.frame(species = sp_data_filled$SPECIES, year = sp_data_filled$YEAR,
|
|
305 week = sp_data_filled$WEEK, DAYNO = sp_data_filled$DAYNO, DAYNO_adj = sp_data_filled$trimDAYNO,
|
|
306 nm = sp_data_filled$NM)[!duplicated(paste(sp_data_filled$YEAR,
|
|
307 sp_data_filled$DAYNO, sep = "_")), ]
|
|
308 flight_curve <- flight_curve[order(flight_curve$DAYNO), ]
|
|
309 # bind if exist else create
|
|
310 if (is.na(flight_curve$nm[1])) next()
|
|
311 if ("flight_pheno" %in% ls()) {
|
|
312 flight_pheno <- rbind(flight_pheno, flight_curve)
|
|
313 }
|
|
314 else {
|
|
315 flight_pheno <- flight_curve
|
|
316 }
|
|
317 } # end of year loop
|
|
318 }
|
|
319 else {
|
|
320 y <- unique(dataset$YEAR)
|
|
321 dataset_y <- dataset[dataset$YEAR == y, ]
|
|
322 nsite <- length(unique(dataset_y$SITE))
|
|
323 # Determine missing days and add to dataset
|
|
324 sp_data_all <- year_day_func(dataset_y)
|
|
325 if (nsite > 200) {
|
|
326 sp_data_all <- sp_data_all[as.character(sp_data_all$SITE) %in% as.character(unique(dataset_y$SITE)[sample(1:nsite,
|
|
327 200, replace = F)]), ]
|
|
328 }
|
|
329 else {
|
|
330 sp_data_all <- sp_data_all
|
|
331 }
|
|
332 sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1
|
|
333 print(paste("Fitting the GAM for",sp_data_all$SPECIES[1],"at year", y,":",Sys.time()))
|
|
334 if(length(unique(sp_data_all$SITE))>1){
|
|
335 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) -1,
|
|
336 data = sp_data_all, family = poisson(link = "log")), silent = TRUE)
|
|
337 }
|
|
338 else {
|
|
339 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1,
|
|
340 data = sp_data_all, family = poisson(link = "log")), silent = TRUE)
|
|
341 }
|
|
342 # Give a second try if the GAM does not converge the first time
|
|
343 if (class(gam_obj_site)[1] == "try-error") {
|
|
344 # Determine missing days and add to dataset
|
|
345 sp_data_all <- year_day_func(dataset_y)
|
|
346 if (nsite > 200) {
|
|
347 sp_data_all <- sp_data_all[as.character(sp_data_all$SITE) %in% as.character(unique(dataset_y$SITE)[sample(1:nsite,
|
|
348 200, replace = F)]), ]
|
|
349 }
|
|
350 else {
|
|
351 sp_data_all <- sp_data_all
|
|
352 }
|
|
353 sp_data_all$trimDAYNO <- sp_data_all$DAYNO - min(sp_data_all$DAYNO) + 1
|
|
354 print(paste("Fitting the GAM for",sp_data_all$SPECIES[1],"at year", y,"with",length(unique(sp_data_all$SITE)),"sites :",Sys.time(),"second try"))
|
|
355 if(length(unique(sp_data_all$SITE))>1){
|
|
356 gam_obj_site <- try(mgcv::bam(COUNT ~ s(trimDAYNO, bs = "cr") + as.factor(SITE) - 1,
|
|
357 data = sp_data_all, family = poisson(link = "log")), silent = TRUE)
|
|
358 }
|
|
359 else {
|
|
360 gam_obj_site <- try(mgcv::gam(COUNT ~ s(trimDAYNO, bs = "cr") -1,
|
|
361 data = sp_data_all, family = poisson(link = "log")), silent = TRUE)
|
|
362 }
|
|
363 if (class(gam_obj_site)[1] == "try-error") {
|
|
364 print(paste("Error in fitting the flight period for",sp_data_all$SPECIES[1],"at year", y,"no convergence after two trial"))
|
|
365 sp_data_all[, "FITTED"] <- NA
|
|
366 sp_data_all[, "COUNT_IMPUTED"] <- NA
|
|
367 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA
|
|
368 sp_data_all[, "NM"] <- NA
|
|
369 }
|
|
370 else {
|
|
371 # Generate a list of values for all days from the additive model and use
|
|
372 # these value to fill the missing observations
|
|
373 sp_data_all[, "FITTED"] <- mgcv::predict.gam(gam_obj_site, newdata = sp_data_all[,
|
|
374 c("trimDAYNO", "SITE")], type = "response")
|
|
375 # force zeros at the beginning end end of the year
|
|
376 sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0
|
|
377 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0
|
|
378 # if infinite number in predict replace with NA.
|
|
379 if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){
|
|
380 print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values"))
|
|
381 sp_data_all[, "FITTED"] <- NA
|
|
382 sp_data_all[, "COUNT_IMPUTED"] <- NA
|
|
383 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA
|
|
384 sp_data_all[, "NM"] <- NA
|
|
385 }
|
|
386 else {
|
|
387 sp_data_all[, "COUNT_IMPUTED"] <- sp_data_all$COUNT
|
|
388 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- sp_data_all$FITTED[is.na(sp_data_all$COUNT)]
|
|
389 # Define the flight curve from the fitted values and append them over
|
|
390 # years (this is one flight curve per year for all site)
|
|
391 site_sums <- aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE),
|
|
392 FUN = sum)
|
|
393 # Rename sum column
|
|
394 names(site_sums)[names(site_sums) == "x"] <- "SITE_YR_FSUM"
|
|
395 # Add data to sp_data data.frame (ensure merge does not sort the data!)
|
|
396 sp_data_all = merge(sp_data_all, site_sums, by <- c("SITE"),
|
|
397 all = TRUE, sort = FALSE)
|
|
398 # Calculate normalized values
|
|
399 sp_data_all[, "NM"] <- sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM
|
|
400 }
|
|
401 }
|
|
402 }
|
|
403 else {
|
|
404 # Generate a list of values for all days from the additive model and use
|
|
405 # these value to fill the missing observations
|
|
406 sp_data_all[, "FITTED"] <- mgcv::predict.gam(gam_obj_site, newdata = sp_data_all[,
|
|
407 c("trimDAYNO", "SITE")], type = "response")
|
|
408 # force zeros at the beginning end end of the year
|
|
409 sp_data_all[sp_data_all$trimDAYNO < 60, "FITTED"] <- 0
|
|
410 sp_data_all[sp_data_all$trimDAYNO > 305, "FITTED"] <- 0
|
|
411 # if infinite number in predict replace with NA.
|
|
412 if(sum(is.infinite(sp_data_all[, "FITTED"]))>0){
|
|
413 print(paste("Error in the flight period for",sp_data_all$SPECIES[1],"at year", y,"weird predicted values"))
|
|
414 sp_data_all[, "FITTED"] <- NA
|
|
415 sp_data_all[, "COUNT_IMPUTED"] <- NA
|
|
416 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- NA
|
|
417 sp_data_all[, "NM"] <- NA
|
|
418 }
|
|
419 else {
|
|
420 sp_data_all[, "COUNT_IMPUTED"] <- sp_data_all$COUNT
|
|
421 sp_data_all[is.na(sp_data_all$COUNT), "COUNT_IMPUTED"] <- sp_data_all$FITTED[is.na(sp_data_all$COUNT)]
|
|
422 # Define the flight curve from the fitted values and append them over
|
|
423 # years (this is one flight curve per year for all site)
|
|
424 site_sums = aggregate(sp_data_all$FITTED, by = list(SITE = sp_data_all$SITE),
|
|
425 FUN = sum)
|
|
426 # Rename sum column
|
|
427 names(site_sums)[names(site_sums) == "x"] = "SITE_YR_FSUM"
|
|
428 # Add data to sp_data data.frame (ensure merge does not sort the data!)
|
|
429 sp_data_all = merge(sp_data_all, site_sums, by = c("SITE"), all = TRUE,
|
|
430 sort = FALSE)
|
|
431 # Calculate normalized values
|
|
432 sp_data_all[, "NM"] = sp_data_all$FITTED/sp_data_all$SITE_YR_FSUM
|
|
433 }
|
|
434 }
|
|
435 sp_data_filled <- sp_data_all
|
|
436 flight_curve <- data.frame(species = sp_data_filled$SPECIES, year = sp_data_filled$YEAR,
|
|
437 week = sp_data_filled$WEEK, DAYNO = sp_data_filled$DAYNO, DAYNO_adj = sp_data_filled$trimDAYNO,
|
|
438 nm = sp_data_filled$NM)[!duplicated(paste(sp_data_filled$YEAR,
|
|
439 sp_data_filled$DAYNO, sep = "_")), ]
|
|
440 flight_curve <- flight_curve[order(flight_curve$DAYNO), ]
|
|
441 if (is.na(flight_curve$nm[1])){
|
|
442 flight_pheno <- data.frame()
|
|
443 }
|
|
444 else {
|
|
445 # bind if exist else create
|
|
446 if ("flight_pheno" %in% ls()) {
|
|
447 flight_pheno <- rbind(flight_pheno, flight_curve)
|
|
448 }
|
|
449 else {
|
|
450 flight_pheno <- flight_curve
|
|
451 }
|
|
452 }
|
|
453 }
|
|
454 return(flight_pheno)
|
|
455 }
|
|
456
|
|
457
|
|
458 #' abundance_index Function
|
|
459 #'
|
|
460 #' This function compute the Abundance Index across sites and years from your dataset and the regional flight curve
|
|
461 #' @param your_dataset A data.frame containing original species count data. The data format is a csv (comma "," separated) with 6 columns with headers, namely "species","transect_id","visit_year","visit_month","visit_day","sp_count"
|
|
462 #' @param flight_pheno A data.frame for the regional flight curve computed with the function flight_curve
|
|
463 #' @keywords standardize flight curve
|
|
464 #' @export
|
|
465 #' @examples
|
|
466 #' abundance_index()
|
|
467
|
|
468 abundance_index <- function(your_dataset,flight_pheno) {
|
|
469
|
|
470 your_dataset$DAYNO <- strptime(paste(your_dataset$DAY, your_dataset$MONTH,
|
|
471 your_dataset$YEAR, sep = "/"), "%d/%m/%Y")$yday + 1
|
|
472
|
|
473 dataset <- your_dataset[, c("SPECIES", "SITE", "YEAR", "MONTH",
|
|
474 "DAY", "DAYNO", "COUNT")]
|
|
475
|
|
476 sample_year <- unique(dataset$YEAR)
|
|
477 sample_year <- sample_year[order(sample_year)]
|
|
478
|
|
479
|
|
480 if (length(sample_year)>1){
|
|
481
|
|
482 for (y in sample_year) {
|
|
483
|
|
484 year_pheno <- flight_pheno[flight_pheno$year == y, ]
|
|
485
|
|
486 dataset_y <- dataset[dataset$YEAR == y, ]
|
|
487
|
|
488 sp_data_site <- year_day_func(dataset_y)
|
|
489 sp_data_site$trimDAYNO <- sp_data_site$DAYNO - min(sp_data_site$DAYNO) + 1
|
|
490
|
|
491 sp_data_site <- merge(sp_data_site, year_pheno[, c("DAYNO", "nm")],
|
|
492 by = c("DAYNO"), all.x = TRUE, sort = FALSE)
|
|
493
|
|
494 # compute proportion of the flight curve sampled due to missing visits
|
|
495 pro_missing_count <- data.frame(SITE = sp_data_site$SITE, WEEK = sp_data_site$WEEK,
|
|
496 NM = sp_data_site$nm, COUNT = sp_data_site$COUNT, ANCHOR = sp_data_site$ANCHOR)
|
|
497 pro_missing_count$site_week <- paste(pro_missing_count$SITE, pro_missing_count$WEEK,
|
|
498 sep = "_")
|
|
499 siteweeknocount <- aggregate(pro_missing_count$COUNT, by = list(pro_missing_count$site_week),
|
|
500 function(x) sum(!is.na(x)) == 0)
|
|
501 pro_missing_count <- pro_missing_count[pro_missing_count$site_week %in%
|
|
502 siteweeknocount$Group.1[siteweeknocount$x == TRUE], ]
|
|
503 pro_count_agg <- aggregate(pro_missing_count$NM, by = list(pro_missing_count$SITE),
|
|
504 function(x) 1 - sum(x, na.rm = T))
|
|
505 names(pro_count_agg) <- c("SITE", "PROP_PHENO_SAMPLED")
|
|
506
|
|
507 # remove samples outside the monitoring window
|
|
508 sp_data_site$COUNT[sp_data_site$nm==0] <- NA
|
|
509
|
|
510 # Compute the regional GAM index
|
|
511
|
|
512 if(length(unique(sp_data_site$SITE))>1){
|
|
513 glm_obj_site <- glm(COUNT ~ factor(SITE) + offset(log(nm)) - 1, data = sp_data_site,
|
|
514 family = quasipoisson(link = "log"), control = list(maxit = 100))
|
|
515 } else {
|
|
516 glm_obj_site <- glm(COUNT ~ offset(log(nm)) - 1, data = sp_data_site,
|
|
517 family = quasipoisson(link = "log"), control = list(maxit = 100))
|
|
518 }
|
|
519
|
|
520 sp_data_site[, "FITTED"] <- predict.glm(glm_obj_site, newdata = sp_data_site,
|
|
521 type = "response")
|
|
522 sp_data_site[, "COUNT_IMPUTED"] <- sp_data_site$COUNT
|
|
523 sp_data_site[is.na(sp_data_site$COUNT), "COUNT_IMPUTED"] <- sp_data_site$FITTED[is.na(sp_data_site$COUNT)]
|
|
524
|
|
525 ## add fitted value for missing mid-week data
|
|
526 sp_data_site <- sp_data_site[!paste(sp_data_site$DAY_WEEK, sp_data_site$COUNT) %in%
|
|
527 c("1 NA", "2 NA", "3 NA", "5 NA", "6 NA", "7 NA"), ]
|
|
528
|
|
529 ## remove all added mid-week values for weeks with real count
|
|
530 ## (observation)
|
|
531 sp_data_site$site_week <- paste(sp_data_site$SITE, sp_data_site$WEEK,
|
|
532 sep = "_")
|
|
533 siteweekcount <- aggregate(sp_data_site$COUNT, by = list(sp_data_site$site_week),
|
|
534 function(x) sum(!is.na(x)) > 0)
|
|
535 sp_data_site <- sp_data_site[!(is.na(sp_data_site$COUNT) & (sp_data_site$site_week %in%
|
|
536 siteweekcount$Group.1[siteweekcount$x == TRUE])), names(sp_data_site) !=
|
|
537 "site_week"]
|
|
538
|
|
539 ## Compute the regional GAM index
|
|
540 print(paste("Compute index for",sp_data_site$SPECIES[1],"at year", y,"for",length(unique(sp_data_site$SITE)),"sites:",Sys.time()))
|
|
541 regional_gam_index <- trap_index(sp_data_site, data_col = "COUNT_IMPUTED",
|
|
542 time_col = "DAYNO", by_col = c("SPECIES", "SITE", "YEAR"))
|
|
543
|
|
544 cumu_index <- merge(regional_gam_index, pro_count_agg, by = c("SITE"),
|
|
545 all.x = TRUE, sort = FALSE)
|
|
546 names(cumu_index) <- c("SITE", "SPECIES", "YEAR", "regional_gam", "prop_pheno_sampled")
|
|
547
|
|
548 cumu_index <- cumu_index[order(cumu_index$SITE), ]
|
|
549
|
|
550 # bind if exist else create
|
|
551 if ("cumullated_indices" %in% ls()) {
|
|
552 cumullated_indices <- rbind(cumullated_indices, cumu_index)
|
|
553 } else {
|
|
554 cumullated_indices <- cumu_index
|
|
555 }
|
|
556
|
|
557 } # end of year loop
|
|
558
|
|
559 } else {
|
|
560
|
|
561 y <- unique(dataset$YEAR)
|
|
562 year_pheno <- flight_pheno[flight_pheno$year == y, ]
|
|
563
|
|
564 dataset_y <- dataset[dataset$YEAR == y, ]
|
|
565
|
|
566 sp_data_site <- year_day_func(dataset_y)
|
|
567 sp_data_site$trimDAYNO <- sp_data_site$DAYNO - min(sp_data_site$DAYNO) + 1
|
|
568
|
|
569 sp_data_site <- merge(sp_data_site, year_pheno[, c("DAYNO", "nm")],
|
|
570 by = c("DAYNO"), all.x = TRUE, sort = FALSE)
|
|
571
|
|
572 # compute proportion of the flight curve sampled due to missing visits
|
|
573 pro_missing_count <- data.frame(SITE = sp_data_site$SITE, WEEK = sp_data_site$WEEK,
|
|
574 NM = sp_data_site$nm, COUNT = sp_data_site$COUNT, ANCHOR = sp_data_site$ANCHOR)
|
|
575 pro_missing_count$site_week <- paste(pro_missing_count$SITE, pro_missing_count$WEEK,
|
|
576 sep = "_")
|
|
577 siteweeknocount <- aggregate(pro_missing_count$COUNT, by = list(pro_missing_count$site_week),
|
|
578 function(x) sum(!is.na(x)) == 0)
|
|
579 pro_missing_count <- pro_missing_count[pro_missing_count$site_week %in%
|
|
580 siteweeknocount$Group.1[siteweeknocount$x == TRUE], ]
|
|
581 pro_count_agg <- aggregate(pro_missing_count$NM, by = list(pro_missing_count$SITE),
|
|
582 function(x) 1 - sum(x, na.rm = T))
|
|
583 names(pro_count_agg) <- c("SITE", "PROP_PHENO_SAMPLED")
|
|
584
|
|
585 # remove samples outside the monitoring window
|
|
586 sp_data_site$COUNT[sp_data_site$nm==0] <- NA
|
|
587
|
|
588 # Compute the regional GAM index
|
|
589 if(length(unique(sp_data_site$SITE))>1){
|
|
590 glm_obj_site <- glm(COUNT ~ factor(SITE) + offset(log(nm)) - 1, data = sp_data_site,
|
|
591 family = quasipoisson(link = "log"), control = list(maxit = 100))
|
|
592 } else {
|
|
593 glm_obj_site <- glm(COUNT ~ offset(log(nm)) - 1, data = sp_data_site,
|
|
594 family = quasipoisson(link = "log"), control = list(maxit = 100))
|
|
595 }
|
|
596
|
|
597 sp_data_site[, "FITTED"] <- predict.glm(glm_obj_site, newdata = sp_data_site,
|
|
598 type = "response")
|
|
599 sp_data_site[, "COUNT_IMPUTED"] <- sp_data_site$COUNT
|
|
600 sp_data_site[is.na(sp_data_site$COUNT), "COUNT_IMPUTED"] <- sp_data_site$FITTED[is.na(sp_data_site$COUNT)]
|
|
601
|
|
602 # add fitted value for missing mid-week data
|
|
603 sp_data_site <- sp_data_site[!paste(sp_data_site$DAY_WEEK, sp_data_site$COUNT) %in%
|
|
604 c("1 NA", "2 NA", "3 NA", "5 NA", "6 NA", "7 NA"), ]
|
|
605
|
|
606 # remove all added mid-week values for weeks with real count
|
|
607 # (observation)
|
|
608 sp_data_site$site_week <- paste(sp_data_site$SITE, sp_data_site$WEEK,
|
|
609 sep = "_")
|
|
610 siteweekcount <- aggregate(sp_data_site$COUNT, by = list(sp_data_site$site_week),
|
|
611 function(x) sum(!is.na(x)) > 0)
|
|
612 sp_data_site <- sp_data_site[!(is.na(sp_data_site$COUNT) & (sp_data_site$site_week %in%
|
|
613 siteweekcount$Group.1[siteweekcount$x == TRUE])), names(sp_data_site) !=
|
|
614 "site_week"]
|
|
615
|
|
616 # Compute the regional gam index
|
|
617 print(paste("Compute index for",sp_data_site$SPECIES[1],"at year", y,"for",length(unique(sp_data_site$SITE)),"sites:",Sys.time()))
|
|
618 regional_gam_index <- trap_index(sp_data_site, data_col = "COUNT_IMPUTED",
|
|
619 time_col = "DAYNO", by_col = c("SPECIES", "SITE", "YEAR"))
|
|
620
|
|
621 cumu_index <- merge(regional_gam_index, pro_count_agg, by = c("SITE"),
|
|
622 all.x = TRUE, sort = FALSE)
|
|
623 names(cumu_index) <- c("SITE", "SPECIES", "YEAR", "regional_gam", "prop_pheno_sampled")
|
|
624
|
|
625 cumu_index <- cumu_index[order(cumu_index$SITE), ]
|
|
626
|
|
627 # bind if exist else create
|
|
628 if ("cumullated_indices" %in% ls()) {
|
|
629 cumullated_indices <- rbind(cumullated_indices, cumu_index)
|
|
630 } else {
|
|
631 cumullated_indices <- cumu_index
|
|
632 }
|
|
633
|
|
634 }
|
|
635
|
|
636 return(cumullated_indices)
|
|
637
|
|
638 }
|