Mercurial > repos > ecology > cb_ivr
view cb_ivr.r @ 1:b67730406f1b draft
planemo upload for repository https://github.com/Marie59/champ_blocs commit 0d86db7d42b608c386a54500064f5f9c9d7019a4
author | ecology |
---|---|
date | Wed, 04 Jan 2023 13:21:30 +0000 |
parents | 8c6142630659 |
children | bcbad4f83dec |
line wrap: on
line source
# author: "Jonathan Richir" # date: "19 April 2021" ) #Rscript ############################### ## ## ############################### #####Packages : dplyr # tidyr # readr # writexl # stringr # readxl # tibble # lubridate # cowplot # magrittr # rmarkdown library(magrittr) #####Load arguments args <- commandArgs(trailingOnly = TRUE) ### Import data if (length(args) < 1) { stop("This tool needs at least 1 argument") }else { fiche_val <- args[1] input_data <- args[2] } ############################################################# # # # Load and clean the data # # # ############################################################# ### load ivr data ivr <- read.csv2(input_data, header = FALSE, fileEncoding = "Latin1") names_ <- as.vector(unlist(ivr[1, ])) names_ <- gsub(" ", ".", names_) colnames(ivr) <- names_ ivr <- ivr[-1, ] ivr <- ivr[, -17] # NB inversion between id and ID.Fiche variable names ivr <- dplyr::rename(ivr, XX = ID.Fiche) ivr <- dplyr::rename(ivr, ID.Fiche = id) ivr <- dplyr::rename(ivr, id = XX) ### load excel files "Fiche terrain" the metadata fiche <- read.csv2(fiche_val, fileEncoding = "Latin1") # fileEncoding = "Latin1" cfr é in variable names date_fiche <- as.Date(stringr::str_sub(fiche$date.sortie, end = 10), origin = "1970-01-01") fiche <- tibble::add_column(fiche, date_fiche, .after = "date.sortie") rm(date_fiche) ## ivr vs fiche terrain ivr$id <- as.numeric(ivr[, c("id")]) fiche_red <- dplyr::filter(fiche, fiche$ID.Fiche %in% unique(ivr[, c("id")])) id_count <- ivr %>% dplyr::group_by(id) %>% dplyr::count() id_count <- dplyr::rename(id_count, "ID.Fiche" = "id") id_count <- dplyr::ungroup(id_count) id_count <- as.data.frame(id_count) fiche_red <- dplyr::left_join(fiche_red, id_count) # rep fiche terrain information fiche_expanded <- fiche_red[rep(row.names(fiche_red), fiche_red$n), 1:ncol(fiche_red)] fiche_expanded <- dplyr::rename(fiche_expanded, "id" = "ID.Fiche") ## merge ivr data and ficheterrain information ivr <- dplyr::bind_cols(ivr, fiche_expanded) ivr <- dplyr::rename(ivr, "id.ivr" = "id...1") ivr <- dplyr::rename(ivr, "id.fiche" = "id...17") rm(fiche_expanded, fiche_red, id_count) ivr <- ivr %>% tidyr::separate(date_fiche, c("Year", "Month", "Day"), sep = "-", remove = FALSE) ## I create two new variables for Site names, one for data analysis and one for data reporting. Only works for actual ivr df with 22 sites ! # Name for data analysis ivr <- tibble::add_column(ivr, Site = ivr$zone.habitat, .after = "ID.Fiche") ivr$Site <- gsub(pattern = " \\(champ de blocs\\)", replacement = "", ivr$Site) ivr$Site <- gsub(pattern = " \\(champ blocs\\)", replacement = "", ivr$Site) for (x in seq_along(ivr$Site)) { if (grepl(pattern = "Locmariaquer", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "GDMO_Locmariaquer" } else if (grepl(pattern = "Beg Lann", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "GDMO_BegLann" } else if (grepl(pattern = "Plateau du Four", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "FOUR_PlateauFour" } else if (grepl(pattern = "Grouin", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "EGMP_GroinCou" } else if (grepl(pattern = "Ensembert", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "EGMP_PasEmsembert" } else if (grepl(pattern = "Brée-les-Bains", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "EGMP_BreeBains" } else if (grepl(pattern = "Antiochat", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "EGMP_PerreAntiochat" } else if (grepl(pattern = "Chassiron", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "EGMP_Chassiron" } else if (grepl(pattern = "zone p", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "BASQ_FlotsBleusZP" } else if (grepl(pattern = "zone f", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "BASQ_FlotsBleusZF" } else if (grepl(pattern = "Saint-Michel", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "GONB_IlotStMichel" } else if (grepl(pattern = "Quéménès", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "FINS_Quemenes" } else if (grepl(pattern = "Goulenez", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "FINS_SeinGoulenez" } else if (grepl(pattern = "Kilaourou", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "FINS_SeinKilaourou" } else if (grepl(pattern = "Verdelet", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "ARMO_Verdelet" } else if (grepl(pattern = "Piégu", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "ARMO_Piegu" } else if (grepl(pattern = "Bilfot", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "ARMO_Bilfot" } else if (grepl(pattern = "Plate", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "ARMO_IlePlate" } else if (grepl(pattern = "Perharidy", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "PDMO_Perharidy" } else if (grepl(pattern = "Keraliou", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "BRES_Keraliou" } else if (grepl(pattern = "Mousterlin", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "FINS_Mousterlin" } else if (grepl(pattern = "Nicolas", ivr$Site[x]) == TRUE) { ivr$Site[x] <- "FINS_StNicolasGlenan" } if (grepl(pattern = "Roz", ivr$site[x]) == TRUE) { ivr$Site[x] <- "FINS_AnseRoz" } } # Name for report/plot ivr <- tibble::add_column(ivr, Site_bis = ivr$Site, .after = "Site") ivr$Site_bis <- ifelse(ivr$Site == "GDMO_Locmariaquer", "Locmariaquer", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "GDMO_BegLann", "Beg Lann", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "FOUR_PlateauFour", "Plateau du Four", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "EGMP_GroinCou", "Grouin du Cou", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "EGMP_PasEmsembert", "Le Pas d'Emsembert", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "EGMP_BreeBains", "La Brée-les-Bains", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "EGMP_PerreAntiochat", "Le Perré d'Antiochat", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "EGMP_Chassiron", "Chassiron", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "BASQ_FlotsBleusZP", "Les Flots Bleus / zone pêcheurs", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "BASQ_FlotsBleusZF", "Les Flots Bleus / zone familles", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "GONB_IlotStMichel", "Îlot Saint-Michel", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "FINS_Quemenes", "Quéménès", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "FINS_SeinGoulenez", "Île de Sein - Goulenez", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "FINS_SeinKilaourou", "Île de Sein - Kilaourou", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "ARMO_Verdelet", "Îlot du Verdelet", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "ARMO_Piegu", "Piégu", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "ARMO_Bilfot", "Pointe de Bilfot", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "ARMO_IlePlate", "Île Plate", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "PDMO_Perharidy", "Perharidy", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "BRES_Keraliou", "Keraliou", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "FINS_Mousterlin", "Pointe de Mousterlin", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "FINS_StNicolasGlenan", "Saint-Nicolas des Glénan", ivr$Site_bis) ivr$Site_bis <- ifelse(ivr$Site == "FINS_AnseRoz", "Pointe de l'Anse du Roz", ivr$Site_bis) ## change some variable format to integer ivr$Nb.Blocs.Non.Retournes <- as.integer(ivr$Nb.Blocs.Non.Retournes) ivr$Nb.Blocs.Retournes <- as.integer(ivr$Nb.Blocs.Retournes) ivr$Year <- as.integer(ivr$Year) ivr$Month <- as.integer(ivr$Month) ivr$Day <- as.integer(ivr$Day) ivr$Numero.Quadrat <- as.integer(ivr$Numero.Quadrat) ## save the final, commplete ivr df. ivr <- ivr[, c(19:54, 1:18)] ## percentage of unturned vs overturned boulders and IVR previous 0-5 discrete scale values calculation # create two new variables first site_year_month_day <- paste0(ivr$Site, ".", gsub("-", ".", as.character(ivr$date_fiche))) ivr <- tibble::add_column(ivr, site_year_month_day, .after = "Site_bis") rm(site_year_month_day) site_year_month_day_qdnb <- paste0(ivr$Site, ".", gsub("-", ".", as.character(ivr$Date)), ".", ivr$Numero.Quadrat) ivr <- tibble::add_column(ivr, site_year_month_day_qdnb, .after = "site_year_month_day") rm(site_year_month_day_qdnb) ivr <- dplyr::arrange(ivr, Site, Year, Month, Numero.Quadrat) # remove data with NA value for Nb.Blocs.Retournes & Nb.Blocs.Non.Retournes ivr_naomit <- ivr %>% dplyr::filter(!is.na(ivr$Nb.Blocs.Retournes)) ivr_naomit <- as.data.frame(ivr_naomit) colnames(ivr_naomit) <- colnames(ivr) ivr_naomit <- ivr_naomit %>% dplyr::filter(!is.na(ivr_naomit$Nb.Blocs.Non.Retournes)) ivr_naomit <- as.data.frame(ivr_naomit) # also remove data with Nb.Blocs.Retournes = 0 & Nb.Blocs.Non.Retournes = 0, cfr equivalent to quadrat with no boulders ... makes no sense to consider quadrat without boulder for ivr determination. ivr_rm <- dplyr::filter(ivr_naomit, ivr_naomit$Nb.Blocs.Retournes == 0 && ivr_naomit$Nb.Blocs.Non.Retournes == 0) ivr_naomit <- ivr_naomit %>% dplyr::anti_join(ivr_rm) rm(ivr_rm) ivr_val_qu_ <- ivr_naomit ############################################################# # # # Calcul of the IVR # # # ############################################################# ### Percentage of turned boulder for (i in 1:nrow(ivr_naomit)) { (bm <- sum(ivr_naomit$Nb.Blocs.Non.Retournes[i], ivr_naomit$Nb.Blocs.Retournes[i])) (ivr_val_qu_$blocs.retournes.fr.[i] <- (ivr_naomit$Nb.Blocs.Retournes[i] / bm) * 100) (ivr_val_qu_$blocs.non.retournes.fr.[i] <- (ivr_naomit$Nb.Blocs.Non.Retournes[i] / bm) * 100) } rm(bm, i) ivr_val_qu_$blocs.non.retournes.fr. <- ifelse(is.nan(ivr_val_qu_$blocs.non.retournes.fr.), NA, ivr_val_qu_$blocs.non.retournes.fr.) ivr_val_qu_$blocs.retournes.fr. <- ifelse(is.nan(ivr_val_qu_$blocs.retournes.fr.), NA, ivr_val_qu_$blocs.retournes.fr.) # ivr for loop by quadrat. for (i in 1:seq_len(nrow(ivr_val_qu_))) { if (ivr_val_qu_$Nb.Blocs.Non.Retournes[i] == 0 && ivr_val_qu_$Nb.Blocs.Retournes[i] == 0) { ivr_ <- NA }else { if (ivr_val_qu_$blocs.retournes.fr.[i] < 5) { ivr_ <- 0 } else if (ivr_val_qu_$blocs.retournes.fr.[i] >= 5 && ivr_val_qu_$blocs.retournes.fr.[i] < 25) { ivr_ <- 1 } else if (ivr_val_qu_$blocs.retournes.fr.[i] >= 25 && ivr_val_qu_$blocs.retournes.fr.[i] < 45) { ivr_ <- 2 } else if (ivr_val_qu_$blocs.retournes.fr.[i] >= 45 && ivr_val_qu_$blocs.retournes.fr.[i] < 65) { ivr_ <- 3 } else if (ivr_val_qu_$blocs.retournes.fr.[i] >= 65 && ivr_val_qu_$blocs.retournes.fr.[i] < 85) { ivr_ <- 4 } else { ivr_ <- 5 } ivr_val_qu_$valeur.ivr_quadrat[i] <- ivr_ } } rm(i, ivr_) # reorder variables for logical purpose ivr_val_qu_ <- ivr_val_qu_[, c(1:56, 58, 57, 59)] indic_full <- ivr_val_qu_ saveRDS(ivr_val_qu_, "ivr_val_qu.RDS") rm(ivr_naomit) ## Calculate ivr statistics now ivr_val_qu_stat_ <- ivr_val_qu_ %>% dplyr::group_by(id.ivr, Site, Site_bis, Year, Month, Day) %>% dplyr::summarize(ivr_moy = mean(valeur.ivr_quadrat), ivr_et = sd(valeur.ivr_quadrat), ivr_med = median(valeur.ivr_quadrat), ivr_min = min(valeur.ivr_quadrat), ivr_max = max(valeur.ivr_quadrat), fr.r.moy = mean(blocs.retournes.fr.), fr.r.et = sd(blocs.retournes.fr.), fr.r.med = median(blocs.retournes.fr.), fr.r.min = min(blocs.retournes.fr.), fr.r.max = max(blocs.retournes.fr.), fr.nr.moy = mean(blocs.non.retournes.fr.), fr.nr.et = sd(blocs.non.retournes.fr.), fr.nr.med = median(blocs.non.retournes.fr.), fr.nr.min = min(blocs.non.retournes.fr.), fr.nr.max = max(blocs.non.retournes.fr.), nb. = dplyr::n()) Date <- as.Date(paste0(ivr_val_qu_stat_$Year, "-", ivr_val_qu_stat_$Month, "-", ivr_val_qu_stat_$Day), origin = "1970-01-01") ivr_val_qu_stat_ <- tibble::add_column(ivr_val_qu_stat_, Date, .after = "Site_bis") rm(Date) ivr_val_qu_stat_ <- as.data.frame(ivr_val_qu_stat_) indic <- ivr_val_qu_stat_ ############################################################# # # # Plot the IVR per site # # # ############################################################# ## plot ivr (NB: Year, Month, Day variable names are replace by Annee, Mois, Jour, cfr previous label use in the script) ivr_val_qu_stat_ <- dplyr::rename(ivr_val_qu_stat_, Annee = Year) ivr_val_qu_stat_ <- dplyr::rename(ivr_val_qu_stat_, Mois = Month) ivr_val_qu_stat_ <- dplyr::rename(ivr_val_qu_stat_, Jour = Day) # new IVR scale with continuous 0 to 5 environmental status levels based on % of overturned boulders /20, plus other site data for (i in c(1:length(unique(ivr_val_qu_stat_$Site)))) { ivr_val_eg <- dplyr::filter(ivr_val_qu_stat_, ivr_val_qu_stat_$Site == unique(ivr_val_qu_stat_$Site)[i]) ivr_plot <- ggplot2::ggplot() + ggplot2::geom_point(ggplot2::aes(x = ivr_val_qu_stat_$Date, y = ivr_val_qu_stat_$fr.r.moy / 20), col = "grey") + ggplot2::geom_rect(ggplot2::aes(xmin = min(ivr_val_qu_stat_$Date), xmax = max(ivr_val_qu_stat_$Date), ymin = - 0.5, ymax = 5 / 20, fill = "#FF0000"), alpha = 0.3) + ggplot2::geom_rect(ggplot2::aes(xmin = min(ivr_val_qu_stat_$Date), xmax = max(ivr_val_qu_stat_$Date), ymin = 5 / 20, ymax = 25 / 20, fill = "#F59404"), alpha = 0.3) + ggplot2::geom_rect(ggplot2::aes(xmin = min(ivr_val_qu_stat_$Date), xmax = max(ivr_val_qu_stat_$Date), ymin = 25 / 20, ymax = 45 / 20, fill = "#FAFA15"), alpha = 0.3) + ggplot2::geom_rect(ggplot2::aes(xmin = min(ivr_val_qu_stat_$Date), xmax = max(ivr_val_qu_stat_$Date), ymin = 45 / 20, ymax = 65 / 20, fill = "#18E125"), alpha = 0.3) + ggplot2::geom_rect(ggplot2::aes(xmin = min(ivr_val_qu_stat_$Date), xmax = max(ivr_val_qu_stat_$Date), ymin = 65 / 20, ymax = 85 / 20, fill = "#04F5F5"), alpha = 0.3) + ggplot2::geom_rect(ggplot2::aes(xmin = min(ivr_val_qu_stat_$Date), xmax = max(ivr_val_qu_stat_$Date), ymin = 85 / 20, ymax = 5.5, fill = "#1A1AE8"), alpha = 0.3) + ggplot2::scale_fill_manual(values = c("#F59404", "#FAFA15", "#FF0000", "#04F5F5", "#18E125", "#1A1AE8")) + ggplot2::geom_pointrange(ggplot2::aes(x = ivr_val_eg$Date, y = ivr_val_eg$fr.r.moy / 20, ymin = ivr_val_eg$fr.r.moy / 20 - ivr_val_eg$fr.r.et / 20, ymax = ivr_val_eg$fr.r.moy / 20 + ivr_val_eg$fr.r.et / 20), col = "black") + ggplot2::xlab("Date") + ggplot2::ylab("IVR") + ggplot2::ggtitle(unique(ivr_val_eg$Site_bis)) + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1), legend.position = "none") ggplot2::ggsave(paste0("ivr_", unique(ivr_val_eg$Site), ".png"), ivr_plot, height = 3, width = 3.5) } report <- args[3] loop_file <- source(args[4])