comparison cb_dissimilarity.r @ 0:8c6142630659 draft

planemo upload for repository https://github.com/Marie59/champ_blocs commit 8b6fcddd239979c11977472de6cbb349690758c8
author ecology
date Fri, 02 Dec 2022 16:13:07 +0000
parents
children bcbad4f83dec
comparison
equal deleted inserted replaced
-1:000000000000 0:8c6142630659
1 # author: "Jonathan Richir"
2 # date: "01 October 2022"
3
4
5 #Rscript
6
7 ###############################
8 ## ##
9 ###############################
10
11 #####Packages : dplyr
12 # tidyr
13 # readr
14 # writexl
15 # stringr
16 # readxl
17 # tibble
18 # lubridate
19 # cowplot
20 # magrittr
21 # rmarkdown
22 library(magrittr)
23 #####Load arguments
24
25 args <- commandArgs(trailingOnly = TRUE)
26
27 #####Import data
28
29 if (length(args) < 1) {
30 stop("This tool needs at least 1 argument")
31 }else {
32 fiche_val <- args[1]
33 input_data <- args[2]
34 choice <- args[3]
35 choice_date <- as.numeric(args[4])
36 }
37
38 #############################################################
39 # #
40 # Loading and cleaning data #
41 # #
42 #############################################################
43 # load qecb data
44
45 qecb <- read.csv2(input_data, header = TRUE, fileEncoding = "Latin1") # fileEncoding = "Latin1", cfr é in variable names
46
47 # import csv files ficheterrain
48
49 fiche <- read.csv2(fiche_val, header = TRUE, fileEncoding = "Latin1") # fileEncoding = "Latin1", cfr é in variable names
50
51 ## work on "Fiche terrain"
52
53 date_fiche <- as.Date(stringr::str_sub(fiche$date.sortie, end = 10), origin = "1970-01-01")
54 fiche <- tibble::add_column(fiche, date_fiche, .after = "date.sortie")
55 rm(date_fiche)
56
57 ## qecb vs fiche terrain
58
59 fiche_red <- dplyr::filter(fiche, fiche$ID.Fiche %in% unique(qecb[, c("id")]))
60
61 id_count <- qecb %>% dplyr::group_by(id) %>% dplyr::count()
62 id_count <- dplyr::rename(id_count, "ID.Fiche" = "id")
63 id_count <- data.frame(id_count)
64 fiche_red <- dplyr::left_join(fiche_red, id_count)
65
66 # rep fiche terrain information
67 fiche_expanded <- fiche_red[rep(row.names(fiche_red), fiche_red$n), 1:ncol(fiche_red)]
68 fiche_expanded <- dplyr::rename(fiche_expanded, "id" = "ID.Fiche")
69
70 ## merge qecb data and ficheterrain information
71
72 qecb <- dplyr::bind_cols(qecb, fiche_expanded)
73 qecb <- dplyr::rename(qecb, "id_qecb" = "id...1")
74 qecb <- dplyr::rename(qecb, "id_fiche" = "id...68")
75
76 rm(fiche_expanded, fiche_red, id_count)
77
78 qecb <- qecb %>% tidyr::separate(date_fiche, c("Year", "Month", "Day"), sep = "-", remove = FALSE)
79
80
81 ## quadrat nb : in contrast to ivr df, quadrat number is missing for many observations in qecb df ; but from the Numero.Photo variable (boulder photo associated to field data collection), I could get back most missing quadrat numbers
82
83 quadrat <- stringr::str_extract(qecb$Numero.Photo, "Q[12345]")
84 qecb <- tibble::add_column(qecb, quadrat, .after = "Numero.Photo")
85 rm(quadrat)
86
87 # check
88 quadrat_bis <- rep(NA, length = nrow(qecb))
89 qecb <- tibble::add_column(qecb, quadrat_bis, .after = "quadrat")
90 rm(quadrat_bis)
91
92 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon %in% c(1, 2) & qecb$Type.Bloc == "Bloc mobile", "Q1", qecb$quadrat_bis)
93 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon %in% c(3, 4) & qecb$Type.Bloc == "Bloc mobile", "Q2", qecb$quadrat_bis)
94 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon %in% c(5, 6) & qecb$Type.Bloc == "Bloc mobile", "Q3", qecb$quadrat_bis)
95 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon %in% c(7, 8) & qecb$Type.Bloc == "Bloc mobile", "Q4", qecb$quadrat_bis)
96 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon %in% c(9, 10) & qecb$Type.Bloc == "Bloc mobile", "Q5", qecb$quadrat_bis)
97
98
99 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon == 1 & qecb$Type.Bloc %in% c("Bloc fixé", "Roche en place"), "Q1", qecb$quadrat_bis)
100 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon == 2 & qecb$Type.Bloc %in% c("Bloc fixé", "Roche en place"), "Q2", qecb$quadrat_bis)
101 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon == 3 & qecb$Type.Bloc %in% c("Bloc fixé", "Roche en place"), "Q3", qecb$quadrat_bis)
102 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon == 4 & qecb$Type.Bloc %in% c("Bloc fixé", "Roche en place"), "Q4", qecb$quadrat_bis)
103 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon == 5 & qecb$Type.Bloc %in% c("Bloc fixé", "Roche en place"), "Q5", qecb$quadrat_bis)
104
105 ## I create two new variables for Site names, one for data analysis and one for data reporting. Only works for actual ivr df with 22 sites !
106
107 # Name for data analysis
108
109 qecb <- tibble::add_column(qecb, Site = qecb$zone.habitat, .after = "ID.Fiche")
110
111 for (x in seq_along(qecb$Site)) {
112 if (grepl(pattern = "Locmariaquer", qecb$Site[x]) == TRUE) {
113 qecb$Site[x] <- "GDMO_Locmariaquer"
114 } else if (grepl(pattern = "Beg Lann", qecb$Site[x]) == TRUE) {
115 qecb$Site[x] <- "GDMO_BegLann"
116 } else if (grepl(pattern = "Plateau du Four", qecb$Site[x]) == TRUE) {
117 qecb$Site[x] <- "FOUR_PlateauFour"
118 } else if (grepl(pattern = "Grouin", qecb$Site[x]) == TRUE) {
119 qecb$Site[x] <- "EGMP_GroinCou"
120 } else if (grepl(pattern = "Ensembert", qecb$Site[x]) == TRUE) {
121 qecb$Site[x] <- "EGMP_PasEmsembert"
122 } else if (grepl(pattern = "Brée-les-Bains", qecb$Site[x]) == TRUE) {
123 qecb$Site[x] <- "EGMP_BreeBains"
124 } else if (grepl(pattern = "Antiochat", qecb$Site[x]) == TRUE) {
125 qecb$Site[x] <- "EGMP_PerreAntiochat"
126 } else if (grepl(pattern = "Chassiron", qecb$Site[x]) == TRUE) {
127 qecb$Site[x] <- "EGMP_Chassiron"
128 } else if (grepl(pattern = "zone p", qecb$Site[x]) == TRUE) {
129 qecb$Site[x] <- "BASQ_FlotsBleusZP"
130 } else if (grepl(pattern = "zone f", qecb$Site[x]) == TRUE) {
131 qecb$Site[x] <- "BASQ_FlotsBleusZF"
132 } else if (grepl(pattern = "Saint-Michel", qecb$Site[x]) == TRUE) {
133 qecb$Site[x] <- "GONB_IlotStMichel"
134 } else if (grepl(pattern = "Quéménès", qecb$Site[x]) == TRUE) {
135 qecb$Site[x] <- "FINS_Quemenes"
136 } else if (grepl(pattern = "Goulenez", qecb$Site[x]) == TRUE) {
137 qecb$Site[x] <- "FINS_SeinGoulenez"
138 } else if (grepl(pattern = "Kilaourou", qecb$Site[x]) == TRUE) {
139 qecb$Site[x] <- "FINS_SeinKilaourou"
140 } else if (grepl(pattern = "Verdelet", qecb$Site[x]) == TRUE) {
141 qecb$Site[x] <- "ARMO_Verdelet"
142 } else if (grepl(pattern = "Piégu", qecb$Site[x]) == TRUE) {
143 qecb$Site[x] <- "ARMO_Piegu"
144 } else if (grepl(pattern = "Bilfot", qecb$Site[x]) == TRUE) {
145 qecb$Site[x] <- "ARMO_Bilfot"
146 } else if (grepl(pattern = "Plate", qecb$Site[x]) == TRUE) {
147 qecb$Site[x] <- "ARMO_IlePlate"
148 } else if (grepl(pattern = "Perharidy", qecb$Site[x]) == TRUE) {
149 qecb$Site[x] <- "PDMO_Perharidy"
150 } else if (grepl(pattern = "Keraliou", qecb$Site[x]) == TRUE) {
151 qecb$Site[x] <- "BRES_Keraliou"
152 } else if (grepl(pattern = "Mousterlin", qecb$Site[x]) == TRUE) {
153 qecb$Site[x] <- "FINS_Mousterlin"
154 } else if (grepl(pattern = "Nicolas", qecb$Site[x]) == TRUE) {
155 qecb$Site[x] <- "FINS_StNicolasGlenan"
156 }
157 if (grepl(pattern = "Roz", qecb$site[x]) == TRUE) {
158 qecb$Site[x] <- "FINS_AnseRoz"
159 }
160 }
161
162 # Name for report/plot
163
164 qecb <- tibble::add_column(qecb, Site_bis = qecb$Site, .after = "Site")
165
166 qecb$Site_bis <- ifelse(qecb$Site == "GDMO_Locmariaquer", "Locmariaquer", qecb$Site_bis)
167 qecb$Site_bis <- ifelse(qecb$Site == "GDMO_BegLann", "Beg Lann", qecb$Site_bis)
168 qecb$Site_bis <- ifelse(qecb$Site == "FOUR_PlateauFour", "Plateau du Four", qecb$Site_bis)
169 qecb$Site_bis <- ifelse(qecb$Site == "EGMP_GroinCou", "Groin du Cou", qecb$Site_bis)
170 qecb$Site_bis <- ifelse(qecb$Site == "EGMP_PasEmsembert", "Le Pas d'Emsembert", qecb$Site_bis)
171 qecb$Site_bis <- ifelse(qecb$Site == "EGMP_BreeBains", "La Brée-les-Bains", qecb$Site_bis)
172 qecb$Site_bis <- ifelse(qecb$Site == "EGMP_PerreAntiochat", "Le Perré d'Antiochat", qecb$Site_bis)
173 qecb$Site_bis <- ifelse(qecb$Site == "EGMP_Chassiron", "Chassiron", qecb$Site_bis)
174 qecb$Site_bis <- ifelse(qecb$Site == "BASQ_FlotsBleusZP", "Les Flots Bleus / zone pêcheurs", qecb$Site_bis)
175 qecb$Site_bis <- ifelse(qecb$Site == "BASQ_FlotsBleusZF", "Les Flots Bleus / zone familles", qecb$Site_bis)
176 qecb$Site_bis <- ifelse(qecb$Site == "GONB_IlotStMichel", "Îlot Saint-Michel", qecb$Site_bis)
177 qecb$Site_bis <- ifelse(qecb$Site == "FINS_Quemenes", "Quéménès", qecb$Site_bis)
178 qecb$Site_bis <- ifelse(qecb$Site == "FINS_SeinGoulenez", "Île de Sein - Goulenez", qecb$Site_bis)
179 qecb$Site_bis <- ifelse(qecb$Site == "FINS_SeinKilaourou", "Île de Sein - Kilaourou", qecb$Site_bis)
180 qecb$Site_bis <- ifelse(qecb$Site == "ARMO_Verdelet", "Îlot du Verdelet", qecb$Site_bis)
181 qecb$Site_bis <- ifelse(qecb$Site == "ARMO_Piegu", "Piégu", qecb$Site_bis)
182 qecb$Site_bis <- ifelse(qecb$Site == "ARMO_Bilfot", "Pointe de Bilfot", qecb$Site_bis)
183 qecb$Site_bis <- ifelse(qecb$Site == "ARMO_IlePlate", "Île Plate", qecb$Site_bis)
184 qecb$Site_bis <- ifelse(qecb$Site == "PDMO_Perharidy", "Perharidy", qecb$Site_bis)
185 qecb$Site_bis <- ifelse(qecb$Site == "BRES_Keraliou", "Keraliou", qecb$Site_bis)
186 qecb$Site_bis <- ifelse(qecb$Site == "FINS_Mousterlin", "Pointe de Mousterlin", qecb$Site_bis)
187 qecb$Site_bis <- ifelse(qecb$Site == "FINS_StNicolasGlenan", "Saint-Nicolas des Glénan", qecb$Site_bis)
188 qecb$Site_bis <- ifelse(qecb$Site == "FINS_AnseRoz", "Pointe de l'Anse du Roz", qecb$Site_bis)
189
190 ## change some variables to factor
191
192 # change 'X..' variables that are indeed % to numeric; https://stackoverflow.com/questions/59410939/apply-function-to-all-variables-with-string-in-name
193 ix <- grep("^X..", names(qecb))
194 qecb[ix] <- lapply(qecb[ix], as.numeric)
195 rm(ix)
196
197
198 ## save the final, complete qecb df_
199
200 qecb <- qecb[, c(72:107, 1:71)]
201
202 ## qecb df preparation prior qecb calculation
203
204 # Several issues to solve in the df first
205
206
207 qecb$Type.Bloc <- factor(qecb$Type.Bloc, levels = c("Bloc mobile", "Bloc fixé", "Roche en place"))
208
209 qecb$Face <- factor(qecb$Face, levels = c("face supérieure", "face inférieure"))
210
211 qecb <- dplyr::arrange(qecb, Type.Bloc, Face, Numéro.Bloc.échantillon)
212
213 qecb <- tibble::add_column(qecb, site_year_month_day = paste0(qecb$Site, ".", qecb$Year, ".", qecb$Month, ".", qecb$Day), .after = "Site_bis")
214
215 # save qecb as a new df_ for analysis purpose => several changes to operate to run the code and functions
216
217 qecbnew <- qecb
218
219 # df with list object nb and corresponding site_year_month_day value to solve for loop issues
220
221 df_list_loop <- data.frame("site_year_month_day" = unique(qecbnew$site_year_month_day),
222 "loop nb" = c(1:seq_along(unique(qecbnew$site_year_month_day))))
223
224 # dplyr::filter for df that makes problem, then eventually correct in the dataframe for wrong coding; brackets (xx) for nb because will change when qecb df_ enlarged.
225 # these listed boulder field survey error when highlighted when running the loop, that ran into an error ; it was a step by step procedure with solving one listed observation after another when issues appeared. Surely not the best way to proceed, maybe better just to skip these surveys (site + date), but in the present case I wanted to keep most of the observations, therefore I corrected them manually whenever needed.
226
227 # list nb (28) - EGMP_BreeBains.2016.04.06
228 qecbnew$Face <- as.character(qecbnew$Face)
229 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_La Bree_20160406_VImport.xlsx" & qecbnew$Référence.bloc == "avr16-LaBreeB9sup", "face supérieure", qecbnew$Face)
230 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_La Bree_20160406_VImport.xlsx" & qecbnew$Référence.bloc == "avr16-LaBreeB10sup", "face supérieure", qecbnew$Face)
231 qecbnew$Face <- as.factor(qecbnew$Face)
232
233 # list nb 33 - EGMP_PerreAntiochat.2016.04.07
234 qecbnew$Face <- as.character(qecbnew$Face)
235 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_PerAnt_20160407_VImport.xlsx" & qecbnew$Référence.bloc == "avr16-PerAntB9sup", "face supérieure", qecbnew$Face)
236 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_PerAnt_20160407_VImport.xlsx" & qecbnew$Référence.bloc == "avr16-PerAntB10sup", "face supérieure", qecbnew$Face)
237 qecbnew$Face <- as.factor(qecbnew$Face)
238
239 # list nb 37 - EGMP_Chassiron.2016.03.09
240 qecbnew$Face <- as.character(qecbnew$Face)
241 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_Chassiron_20160309&10_VImport.xlsx" & qecbnew$Référence.bloc == "mars16-ChassB9sup", "face supérieure", qecbnew$Face)
242 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_Chassiron_20160309&10_VImport.xlsx" & qecbnew$Référence.bloc == "mars16-ChasB10sup", "face supérieure", qecbnew$Face)
243 qecbnew$Face <- as.factor(qecbnew$Face)
244
245 # list nb 76 - ARMO_Verdelet.2015.03.23
246 qecbnew$Face <- as.character(qecbnew$Face)
247 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_Verdelet_20150323_VImport.xlsx" & qecbnew$Référence.bloc == "mar15-VerB10inf", "face inférieure", qecbnew$Face)
248 qecbnew$Face <- as.factor(qecbnew$Face)
249
250 # list nb 116 - "GDMO_Locmariaquer.2018.09.10"
251 qecbnew$Type.Bloc <- as.character(qecbnew$Type.Bloc)
252 qecbnew$Type.Bloc <- ifelse(qecbnew$ID.Fiche == "2018-09-10-GDMO-CDB-001" & qecbnew$Numero.Photo == "2018-09-10_GDMO_01_CDB-5_sup_392578.jpg", "Roche en place", qecbnew$Type.Bloc)
253 qecbnew$Type.Bloc <- as.factor(qecbnew$Type.Bloc)
254 qecbnew$quadrat_bis <- ifelse(qecbnew$ID.Fiche == "2018-09-10-GDMO-CDB-001" & qecbnew$Numero.Photo == "2018-09-10_GDMO_01_CDB-5_sup_392578.jpg", "Q5", qecbnew$quadrat_bis)
255 qecbnew <- qecbnew %>% dplyr::filter(!(ID.Fiche == "2018-09-10-GDMO-CDB-001" & Numero.Photo == ""))
256
257 # Few sites to remove prior running the for loop because it was not just a encoding mistake for one data, but a globally wroing coding for the site + date survey.
258
259 qecb_i <- qecbnew %>% dplyr::filter(site_year_month_day == "FINS_StNicolasGlenan.2016.04.08") # no bloc fixe !
260 qecbnew <- qecbnew %>% dplyr::filter(site_year_month_day != "FINS_StNicolasGlenan.2016.04.08")
261 qecb_i <- qecbnew %>% dplyr::filter(site_year_month_day == "GDMO_Locmariaquer.2019.09.30") # most faces of blocs mobiles do not correspond to each other; only 3 over 10 boulder have data for both face supérieure and face inférieure
262 qecbnew <- qecbnew %>% dplyr::filter(site_year_month_day != "GDMO_Locmariaquer.2019.09.30")
263 rm(df_list_loop, qecb_i)
264
265
266 # check for species with count within sub-0.1m^2-quadrat (i.e. reduced size quadrat compare to most organisms on boulder to count them, because abundant ; then some extrapolation)
267
268 # first for Spirobranchus
269
270 qecbnew$Nb.Spirobranchus.lamarckii.total.ini <- qecbnew$Nb.Spirobranchus.lamarckii.total
271 qecbnew$Nb.Spirobranchus.lamarckii.total <- as.character(qecbnew$Nb.Spirobranchus.lamarckii.total)
272
273
274 qecbnew_spirobranchus <- (dplyr::filter(qecbnew, Nb.Spirobranchus.lamarckii.total %in% c(NA, "NaN", "Inf", "-Inf")))
275 qecbnew_spirobranchus[, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")] <- sapply(qecbnew_spirobranchus[, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")], as.character)
276 (spirobranchus_data <- subset(qecbnew_spirobranchus, !is.na(qecbnew_spirobranchus$Nb.Spirobranchus.lamarckii.1B) || !is.na(qecbnew_spirobranchus$Nb.Spirobranchus.lamarckii.2B) || !is.na(qecbnew_spirobranchus$Nb.Spirobranchus.lamarckii.3B) || !is.na(qecbnew_spirobranchus$Nb.Spirobranchus.lamarckii.4B) || !is.na(qecbnew_spirobranchus$Nb.Spirobranchus.lamarckii.5B))[, c("site_year_month_day", "Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total")])
277
278 quemenes <- dplyr::filter(qecbnew, Site == "FINS_Quemenes")
279 quemenes <- dplyr::arrange(quemenes, date_fiche)
280 # for Quemenes, issue because for sampling date "FINS_Quemenes.2015.09.30" the 5 counts of Spirobranchus were encoded in 1B instead of total !!! I noticed this issue when mining data (see below), therefore I corrected before running below script for Spirobranchus.
281 qecbnew$Nb.Spirobranchus.lamarckii.total <- ifelse(qecbnew$site_year_month_day == "FINS_Quemenes.2015.09.30" & is.na(qecbnew$Nb.Spirobranchus.lamarckii.total), qecbnew$Nb.Spirobranchus.lamarckii.1B, qecbnew$Nb.Spirobranchus.lamarckii.total)
282
283 (quemenes <- dplyr::filter(qecbnew, site_year_month_day == "FINS_Quemenes.2015.09.30")[, c("site_year_month_day", "Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total")])
284 rm(quemenes)
285
286 seinkilaourou <- dplyr::filter(qecbnew, Site == "FINS_SeinKilaourou")
287 seinkilaourou <- dplyr::arrange(seinkilaourou, date_fiche)
288 # same issue with SeinKilaourou
289 qecbnew$Nb.Spirobranchus.lamarckii.total <- ifelse(qecbnew$site_year_month_day == "FINS_SeinKilaourou.2015.04.21" & is.na(qecbnew$Nb.Spirobranchus.lamarckii.total), qecbnew$Nb.Spirobranchus.lamarckii.1B, qecbnew$Nb.Spirobranchus.lamarckii.total)
290 (seinkilaourou <- dplyr::filter(qecbnew, site_year_month_day == "FINS_SeinKilaourou.2015.04.21")[, c("site_year_month_day", "Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total")])
291 rm(seinkilaourou)
292
293 # some more issues however with "x100"count data
294 spirobranchus <- subset(qecbnew, !is.na(qecbnew$Nb.Spirobranchus.lamarckii.1B) & !is.na(qecbnew$Nb.Spirobranchus.lamarckii.2B) & !is.na(qecbnew$Nb.Spirobranchus.lamarckii.3B) & !is.na(qecbnew$Nb.Spirobranchus.lamarckii.4B) & !is.na(qecbnew$Nb.Spirobranchus.lamarckii.5B) & !is.na(qecbnew$Nb.Spirobranchus.lamarckii.total))[, c("site_year_month_day", "Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total")]
295
296 for (i in c(1:nrow(spirobranchus))) {
297 spirobranchus$mean.x.100[[i]] <- sum(spirobranchus[i, c(2:6)], na.rm = TRUE) / sum(!is.na(spirobranchus[i, c(2:6)])) * 100
298 }
299
300 spirobranchus$mean.x.100 <- unlist(spirobranchus$mean.x.100)
301 spirobranchus$Nb.Spirobranchus.lamarckii.total <- as.numeric(spirobranchus$Nb.Spirobranchus.lamarckii.total)
302 for (i in c(1:nrow(spirobranchus))) {
303 spirobranchus$diff[[i]] <- spirobranchus[i, "Nb.Spirobranchus.lamarckii.total"] - spirobranchus[i, "mean.x.100"]
304 }
305
306 spirobranchus$diff <- abs(as.integer(spirobranchus$diff))
307 spirobranchus <- dplyr::arrange(spirobranchus, desc(diff), mean.x.100)
308 spirobranchus <- dplyr::arrange(dplyr::filter(spirobranchus, diff != 0 & mean.x.100 != 0), desc(diff))
309
310 # check it all in the qecbnew df
311
312 for (i in c(1:nrow(qecbnew))) {
313 qecbnew$mean.x.100[[i]] <-
314 sum(qecbnew[i, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")], na.rm = TRUE) / sum(!is.na(qecbnew[i, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")])) * 100
315 } # sum of only NAs/0 = NaN; so replace NaN by Na
316 qecbnew$mean.x.100 <- as.character(qecbnew$mean.x.100)
317
318
319 for (i in c(1:nrow(qecbnew))) {
320 qecbnew$mean.x.100[[i]] <- ifelse(qecbnew$mean.x.100[[i]] == "NaN", NA, qecbnew$mean.x.100[[i]])
321 }
322 qecbnew$mean.x.100 <- as.integer(qecbnew$mean.x.100)
323
324 qecbnew$Nb.Spirobranchus.lamarckii.total <- as.integer(qecbnew$Nb.Spirobranchus.lamarckii.total)
325 qecbnew$Nb.Spirobranchus.lamarckii.total.diff <- abs((qecbnew$Nb.Spirobranchus.lamarckii.total - qecbnew$mean.x.100))
326 spirobranchus_diff <- qecbnew[, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total", "Nb.Spirobranchus.lamarckii.total.ini", "mean.x.100", "Nb.Spirobranchus.lamarckii.total.diff")]
327 spirobranchus_diff <- dplyr::arrange(spirobranchus_diff, desc(Nb.Spirobranchus.lamarckii.total.diff), mean.x.100)
328 spirobranchus_diff <- dplyr::arrange(dplyr::filter(spirobranchus_diff, Nb.Spirobranchus.lamarckii.total.diff != 0 & mean.x.100 != 0), desc(Nb.Spirobranchus.lamarckii.total.diff))
329
330 qecbnew$Nb.Spirobranchus.lamarckii.total <- ifelse(qecbnew$Nb.Spirobranchus.lamarckii.total.diff != 0 & qecbnew$mean.x.100 != 0, qecbnew$mean.x.100, qecbnew$Nb.Spirobranchus.lamarckii.total)
331 spirobranchus_diff <- qecbnew[, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total", "Nb.Spirobranchus.lamarckii.total.ini", "mean.x.100", "Nb.Spirobranchus.lamarckii.total.diff")]
332 spirobranchus_diff$Nb.Spirobranchus.lamarckii.total.diff <- abs(as.integer(spirobranchus_diff$Nb.Spirobranchus.lamarckii.total.diff))
333 spirobranchus_diff <- dplyr::arrange(spirobranchus_diff, desc(Nb.Spirobranchus.lamarckii.total.diff), mean.x.100)
334 spirobranchus_diff <- dplyr::arrange(dplyr::filter(spirobranchus_diff, Nb.Spirobranchus.lamarckii.total.diff != 0 & mean.x.100 != 0), desc(Nb.Spirobranchus.lamarckii.total.diff))
335 # ok, change made when data x 100 was not correct.
336
337 # finally, change NA by mean.x100 for Spirobranchus total
338 qecbnew$Nb.Spirobranchus.lamarckii.total <- as.character(qecbnew$Nb.Spirobranchus.lamarckii.total)
339 for (i in c(1:nrow(qecbnew))) {
340 qecbnew$Nb.Spirobranchus.lamarckii.total[[i]] <- ifelse(qecbnew$Nb.Spirobranchus.lamarckii.total[[i]] %in% c(NA, "NaN", "Inf", "-Inf"), sum(qecbnew[i, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")], na.rm = TRUE) / sum(!is.na(qecbnew[i, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")])) * 100, qecbnew$Nb.Spirobranchus.lamarckii.total[[i]])
341 } # sum of only NAs/0 = NaN; so replace NaN by Na
342
343
344 for (i in c(1:nrow(qecbnew))) {
345 qecbnew$Nb.Spirobranchus.lamarckii.total[[i]] <- ifelse(qecbnew$Nb.Spirobranchus.lamarckii.total[[i]] == "NaN", NA, qecbnew$Nb.Spirobranchus.lamarckii.total[[i]])
346 }
347
348 qecbnew$Nb.Spirobranchus.lamarckii.total <- as.integer(qecbnew$Nb.Spirobranchus.lamarckii.total)
349
350 qecbnew$Nb.Spirobranchus.lamarckii.total.diff <- abs(qecbnew$Nb.Spirobranchus.lamarckii.total - qecbnew$Nb.Spirobranchus.lamarckii.total.ini)
351 spirobranchus_diff <- qecbnew[, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total", "Nb.Spirobranchus.lamarckii.total.ini", "mean.x.100", "Nb.Spirobranchus.lamarckii.total.diff")]
352 spirobranchus_diff <- dplyr::arrange(spirobranchus_diff, desc(Nb.Spirobranchus.lamarckii.total.diff), mean.x.100)
353 spirobranchus_diff <- dplyr::arrange(dplyr::filter(spirobranchus_diff, Nb.Spirobranchus.lamarckii.total.diff != 0 & mean.x.100 != 0), desc(Nb.Spirobranchus.lamarckii.total.diff))
354
355 qecbnew <- subset(qecbnew, select = -c(Nb.Spirobranchus.lamarckii.total.ini, mean.x.100, Nb.Spirobranchus.lamarckii.total.diff))
356
357 rm(qecbnew_spirobranchus, spirobranchus, spirobranchus_data, spirobranchus_diff)
358
359 # do the same for spirorbis
360
361 qecbnew$Nb.spirorbis.total.ini <- qecbnew$Nb.spirorbis.total
362 qecbnew$Nb.spirorbis.total <- as.character(qecbnew$Nb.spirorbis.total)
363
364 qecbnew_spirorbis <- (dplyr::filter(qecbnew, Nb.spirorbis.total %in% c(NA, "NaN", "Inf", "-Inf")))
365 qecbnew_spirorbis[, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")] <- sapply(qecbnew_spirorbis[, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")], as.character)
366 (spirobranchus_data <- subset(qecbnew_spirorbis, !is.na(qecbnew_spirorbis$Nb.spirorbis.1A) || !is.na(qecbnew_spirorbis$Nb.spirorbis.2A) || !is.na(qecbnew_spirorbis$Nb.spirorbis.3A) || !is.na(qecbnew_spirorbis$Nb.spirorbis.4A) || !is.na(qecbnew_spirorbis$Nb.spirorbis.5A))[, c("site_year_month_day", "Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A", "Nb.spirorbis.total")])
367
368 # In contrast to Spirobranchus data, no encoding issues for spirorbis data, cfr when sub-quadrat 1A-5A are ALL encoded, NA for total.
369
370 # some more issues however with "x200"count data
371
372 spirorbis <- subset(qecbnew, !is.na(qecbnew$Nb.spirorbis.1A) & !is.na(qecbnew$Nb.spirorbis.2A) & !is.na(qecbnew$Nb.spirorbis.3A) & !is.na(qecbnew$Nb.spirorbis.4A) & !is.na(qecbnew$Nb.spirorbis.5A) & !is.na(qecbnew$Nb.spirorbis.total))[, c("site_year_month_day", "Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A", "Nb.spirorbis.total")]
373 for (i in c(1:nrow(spirorbis))) {
374 spirorbis$mean.x.200[[i]] <- sum(spirorbis[i, c(2:6)], na.rm = TRUE) / sum(!is.na(spirorbis[i, c(2:6)])) * 200
375 }
376 spirorbis$mean.x.200 <- unlist(spirorbis$mean.x.200)
377 spirorbis$Nb.spirorbis.total <- as.numeric(spirorbis$Nb.spirorbis.total)
378 for (i in c(1:nrow(spirorbis))) {
379 spirorbis$diff[[i]] <- spirorbis[i, "Nb.spirorbis.total"] - spirorbis[i, "mean.x.200"]
380 }
381 spirorbis$diff <- abs(as.integer(spirorbis$diff))
382 spirorbis <- dplyr::arrange(spirorbis, desc(diff), mean.x.200)
383 (gonb_ilotstmichel_2015_04_18 <- dplyr::filter(spirorbis, site_year_month_day == "GONB_IlotStMichel.2015.04.18"))
384 rm(gonb_ilotstmichel_2015_04_18)
385 spirorbis <- dplyr::arrange(dplyr::filter(spirorbis, diff != 0 & mean.x.200 != 0), desc(diff))
386
387 # check it all in the qecbnew df
388
389 for (i in c(1:nrow(qecbnew))) {
390 qecbnew$mean.x.200[[i]] <- sum(qecbnew[i, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")], na.rm = TRUE) / sum(!is.na(qecbnew[i, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")])) * 200
391 } # sum of only NAs/0 = NaN; so replace NaN by Na
392 qecbnew$mean.x.200 <- as.character(qecbnew$mean.x.200)
393
394 for (i in c(1:nrow(qecbnew))) {
395 qecbnew$mean.x.200[[i]] <- ifelse(qecbnew$mean.x.200[[i]] == "NaN", NA, qecbnew$mean.x.200[[i]])
396 }
397
398 qecbnew$mean.x.200 <- as.integer(qecbnew$mean.x.200)
399
400 qecbnew$Nb.spirorbis.total <- as.integer(qecbnew$Nb.spirorbis.total)
401 qecbnew$Nb.spirorbis.total.diff <- abs((qecbnew$Nb.spirorbis.total - qecbnew$mean.x.200))
402 spirorbis_diff <- qecbnew[, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A", "Nb.spirorbis.total", "Nb.spirorbis.total.ini", "mean.x.200", "Nb.spirorbis.total.diff")]
403 spirorbis_diff <- dplyr::arrange(spirorbis_diff, desc(Nb.spirorbis.total.diff), mean.x.200)
404 spirorbis_diff <- dplyr::arrange(dplyr::filter(spirorbis_diff, Nb.spirorbis.total.diff != 0 & mean.x.200 != 0), desc(Nb.spirorbis.total.diff))
405
406 qecbnew$Nb.spirorbis.total <- ifelse(qecbnew$Nb.spirorbis.total.diff != 0 & qecbnew$mean.x.200 != 0, qecbnew$mean.x.200, qecbnew$Nb.spirorbis.total)
407 spirorbis_diff <- qecbnew[, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A", "Nb.spirorbis.total", "Nb.spirorbis.total.ini", "mean.x.200", "Nb.spirorbis.total.diff")]
408 spirorbis_diff$Nb.spirorbis.total.diff <- abs(as.integer(spirorbis_diff$Nb.spirorbis.total.diff))
409 spirorbis_diff <- dplyr::arrange(spirorbis_diff, desc(Nb.spirorbis.total.diff), mean.x.200)
410 spirorbis_diff <- dplyr::arrange(dplyr::filter(spirorbis_diff, Nb.spirorbis.total.diff != 0 & mean.x.200 != 0), desc(Nb.spirorbis.total.diff))
411 # ok, change made when data x 200 was not correct.
412
413 # finally, change NA by mean.x200 for spirorbis total
414 qecbnew$Nb.spirorbis.total <- as.character(qecbnew$Nb.spirorbis.total)
415 for (i in c(1:nrow(qecbnew))) {
416 qecbnew$Nb.spirorbis.total[[i]] <- ifelse(qecbnew$Nb.spirorbis.total[[i]] %in% c(NA, "NaN", "Inf", "-Inf"), sum(qecbnew[i, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")], na.rm = TRUE) / sum(!is.na(qecbnew[i, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")])) * 200, qecbnew$Nb.spirorbis.total[[i]])
417 } # sum of only NAs/0 = NaN; so replace NaN by Na
418
419 for (i in c(1:nrow(qecbnew))) {
420 qecbnew$Nb.spirorbis.total[[i]] <- ifelse(qecbnew$Nb.spirorbis.total[[i]] == "NaN", NA, qecbnew$Nb.spirorbis.total[[i]])
421 }
422
423 qecbnew$Nb.spirorbis.total <- as.integer(qecbnew$Nb.spirorbis.total)
424
425 qecbnew$Nb.spirorbis.total.diff <- abs(qecbnew$Nb.spirorbis.total - qecbnew$Nb.spirorbis.total.ini)
426 spirorbis_diff <- qecbnew[, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A", "Nb.spirorbis.total", "Nb.spirorbis.total.ini", "mean.x.200", "Nb.spirorbis.total.diff")]
427 spirorbis_diff <- dplyr::arrange(spirorbis_diff, desc(Nb.spirorbis.total.diff), mean.x.200)
428 spirorbis_diff <- dplyr::arrange(dplyr::filter(spirorbis_diff, Nb.spirorbis.total.diff != 0 & mean.x.200 != 0), desc(Nb.spirorbis.total.diff))
429
430 qecbnew <- subset(qecbnew, select = -c(Nb.spirorbis.total.ini, mean.x.200, Nb.spirorbis.total.diff))
431
432 rm(qecbnew_spirorbis, spirorbis, spirobranchus_data, spirorbis_diff, i)
433
434
435 # dplyr::filter for abnormal data, based on histogram distribution of data
436
437 dplyr::filter(qecbnew, X..algues.brunes > 100)[, c("Site", "date_fiche", "Type.Bloc", "Numéro.Bloc.échantillon", "Face", "X..algues.brunes")]
438 qecbnew$X..algues.brunes <- ifelse(qecbnew$X..algues.brunes > 100, 100, qecbnew$X..algues.brunes)
439 dplyr::filter(qecbnew, X..algues.rouges > 100)[, c("Site", "date_fiche", "Type.Bloc", "Numéro.Bloc.échantillon", "Face", "X..algues.rouges")]
440 qecbnew$X..algues.rouges <- ifelse(qecbnew$X..algues.rouges > 100, 100, qecbnew$X..algues.rouges)
441
442
443 ## SCRIPT I - NAs <- 0 ; cfr previous comment makes no sense to have NA encoded when the presence of an organism is in reality = 0
444
445 # We are facing an issues with NA observations, because either they were not measured/counted, then they are effectively NAs; or these NAs = indeed "0"; but I cannot have NAs for variables that are included in the index determination, cfr if 5+0 = 5, 5+NA = NA; see for example site_year_month_day == "ARMO_Bilfot.2014.04.28", Nb.Spirobranchus.lamarckii.total is NA ...
446 # I theregore change these NAs by 0
447
448 # replace NAs by "0" for variables used in qecb determination
449 qecbnew[, c("X..algues.brunes",
450 "X..algues.rouges",
451 "X..Lithophyllum",
452 "X..Cladophora",
453 "Nb.Littorina.obtusata",
454 "Nb.Gibbula.cineraria",
455 "Nb.Gibbula.pennanti",
456 "Nb.Gibbula.umbilicalis",
457 "X..Eponges",
458 "X..Ascidies.Coloniales",
459 "X..Ascidies.Solitaires",
460 "X..Bryozoaires.Dresses",
461 "X..algues.vertes",
462 "X..Roche.Nue",
463 "Nb.spirorbis.total",
464 "X..Balanes.Vivantes",
465 "Nb.Spirobranchus.lamarckii.total",
466 "X..Surface.Accolement")] <- lapply(qecbnew[,
467 c("X..algues.brunes",
468 "X..algues.rouges",
469 "X..Lithophyllum",
470 "X..Cladophora",
471 "Nb.Littorina.obtusata",
472 "Nb.Gibbula.cineraria",
473 "Nb.Gibbula.pennanti",
474 "Nb.Gibbula.umbilicalis",
475 "X..Eponges",
476 "X..Ascidies.Coloniales",
477 "X..Ascidies.Solitaires",
478 "X..Bryozoaires.Dresses",
479 "X..algues.vertes",
480 "X..Roche.Nue",
481 "Nb.spirorbis.total",
482 "X..Balanes.Vivantes",
483 "Nb.Spirobranchus.lamarckii.total",
484 "X..Surface.Accolement")],
485 function(x) replace(x, is.na(x), 0))
486
487 # and also replace NA for bivalve by 0 for EGMP and BASQ surveys cfr for accollement correction later on.
488
489 qecbnew$X..Mytilus.sp. <- ifelse((substr(qecbnew$Site, 1, 4) %in% c("EGMP", "BASQ")) & is.na(qecbnew$X..Mytilus.sp.), 0, qecbnew$X..Mytilus.sp.)
490 qecbnew$Nb.Crassostrea.gigas <- ifelse((substr(qecbnew$Site, 1, 4) %in% c("EGMP", "BASQ")) & is.na(qecbnew$Nb.Crassostrea.gigas), 0, qecbnew$Nb.Crassostrea.gigas)
491 qecbnew$Nb.Ostrea.edulis <- ifelse((substr(qecbnew$Site, 1, 4) %in% c("EGMP", "BASQ")) & is.na(qecbnew$Nb.Ostrea.edulis), 0, qecbnew$Nb.Ostrea.edulis)
492
493
494 # add a region variable
495 region <- rep(NA, nrow(qecbnew))
496 qecbnew <- tibble::add_column(qecbnew, region, .after = "Site_bis")
497 qecbnew$region <- ifelse(qecbnew$Site %in% c("EGMP_GroinCou", "EGMP_PasEmsembert", "EGMP_BreeBains", "EGMP_PerreAntiochat", "EGMP_Chassiron", "BASQ_FlotsBleusZP", "BASQ_FlotsBleusZF"), "EGMP.BASQ", "Bretagne")
498 rm(region)
499 qecbnew <- dplyr::arrange(qecbnew, region, site_year_month_day, Type.Bloc, Numéro.Bloc.échantillon, Face)
500
501 # accolement function according to recent 'retournement'
502
503 ## before I go further ahead, I have to correct for surface d'accollement for several variable for BM.FI !!
504
505 # not the same file name between script qecb script (qecbNew) and this script (qecbNew); doesn't matter, only appears here in the first dplyr::filter lines.
506
507 qecbnew <- tibble::add_column(qecbnew, terri_ = substr(qecbnew$Site, 1, 4), .after = "Site_bis")
508
509 qecbnew$X..Eponges_ini <- qecbnew$X..Eponges
510 qecbnew$X..Ascidies.Coloniales_ini <- qecbnew$X..Ascidies.Coloniales
511 qecbnew$X..Ascidies.Solitaires_ini <- qecbnew$X..Ascidies.Solitaires
512 qecbnew$X..Bryozoaires.Dresses_ini <- qecbnew$X..Bryozoaires.Dresses
513 qecbnew$X..Lithophyllum_ini <- qecbnew$X..Lithophyllum
514 qecbnew$X..Balanes.Vivantes_ini <- qecbnew$X..Balanes.Vivantes
515
516 df_bm_fs <- qecbnew %>% dplyr::filter(Type.Bloc == "Bloc mobile" & Face == "face supérieure")
517 df_bm_fi <- qecbnew %>% dplyr::filter(Type.Bloc == "Bloc mobile" & Face == "face inférieure")
518 df_bf <- qecbnew %>% dplyr::filter(Type.Bloc != "Bloc mobile")
519
520 `%notin%` <- Negate(`%in%`)
521
522 acco_fct <- function(var_) {
523
524 df_bm_fi$var_cor.acco. <<- NA
525
526 for (i in c(1:nrow(df_bm_fi))) {
527
528 df_bm_fi$var_cor.acco.[[i]] <<- if (df_bm_fi$terri_[[i]] %notin% c("EGMP", "BASQ")) {
529 ifelse(#df_$Couleur.dominante %in% c("Rouge", "Brune", "Brune-Rouge") ||
530 df_bm_fs$Couleur.dominante[[i]] %in% c("Blanche", "Verte", "Blanche-Verte", "Colorée"), df_bm_fi[i, var_] / (100 - df_bm_fi$X..Surface.Accolement[[i]]) * 100, df_bm_fi[i, var_])
531 } else {
532 ifelse(df_bm_fs$Couleur.dominante[[i]] %in% c("Blanche", "Verte", "Blanche-Verte", "Colorée")
533 & df_bm_fi$X..Surface.Accolement[[i]] != 0 # I have to use it in dplyr::filter this time as well for EGMP- BASQ (but not for Bretagne, although could be added, same result); identical/repeated measure for BM.FI and BM.FS
534 & df_bm_fs$X..Mytilus.sp.[[i]] == 0, df_bm_fi[i, var_] / (100 - df_bm_fi$X..Surface.Accolement[[i]]) * 100, df_bm_fi[i, var_])
535 }
536
537 }
538
539 }
540
541 # I would only consider colors in c("Rouge", "Brune", "Brune-Rouge") for BM.FI correction [ and not the series c("Blanche-Brune", "Rouge", "Brune", "Blanche-Rouge", "Brune-Rouge", "Rouge-Verte", "Brune-Verte") ] ; and for BM.FS, the list c("Blanche", "Verte", "Colorée") => we do the correction for BM.FI accollement based on BM.FS color !!!
542
543
544 # apply acco_fct to BM.FI variables
545
546 apply_acco_fct <- function(var_) {
547
548 show(sort(df_bm_fi[, var_], decreasing = TRUE, index.return = FALSE)[1:50])
549 pre_ <- as.vector(df_bm_fi[, var_])
550 acco_fct(var_)
551 df_bm_fi <<- tibble::add_column(df_bm_fi, var_cor. = df_bm_fi$var_cor.acco., .after = var_)
552 show(sort(df_bm_fi$var_cor., decreasing = TRUE, index.return = FALSE)[1:50])
553 df_bm_fi$var_cor. <<- as.numeric(ifelse(as.character(df_bm_fi$var_cor.) %in% c(NA, "NaN", "-Inf", "Inf"), "0", as.character(df_bm_fi$var_cor.)))
554 df_bm_fi$var_cor. <<- ifelse(df_bm_fi$var_cor. > 100, 100, df_bm_fi$var_cor.)
555 show(sort(df_bm_fi$var_cor., decreasing = TRUE, index.return = FALSE)[1:50])
556 show(length(na.omit(which(abs(as.vector(df_bm_fi$var_cor.) - pre_) != 0))) / na.omit(length(df_bm_fi$var_cor.)) * 100)
557 par(mfrow = c(1, 3))
558 hist(pre_, main = var_, xlab = "pre-corection")
559 hist(df_bm_fi$var_cor., main = var_, xlab = "post-corection")
560 hist(df_bm_fi[as.vector(which(abs(as.vector(df_bm_fi$var_cor.) - pre_) != 0)), var_], main = var_, xlab = "diff. post-pre != 0")
561 par(mfrow = c(1, 1))
562 df_bm_fi <<- df_bm_fi[, -which(names(df_bm_fi) %in% c(var_, "var_cor.acco."))]
563 colnames(df_bm_fi)[colnames(df_bm_fi) == "var_cor."] <<- var_
564
565 rm(pre_)
566
567 }
568
569 apply_acco_fct("X..Eponges")
570 apply_acco_fct("X..Ascidies.Coloniales")
571 apply_acco_fct("X..Ascidies.Solitaires")
572 apply_acco_fct("X..Bryozoaires.Dresses")
573 apply_acco_fct("X..Lithophyllum")
574 apply_acco_fct("X..Balanes.Vivantes")
575
576 qecbnew <- dplyr::bind_rows(df_bm_fs, df_bm_fi)
577 qecbnew <- dplyr::bind_rows(qecbnew, df_bf)
578
579 qecbnew <- dplyr::arrange(qecbnew, region, site_year_month_day, Type.Bloc, Numéro.Bloc.échantillon, Face)
580
581 # do remove some more data ...
582 # "FINS_Quemenes.2020.10.16", bad encoding, I let know Anna Capietto to make changes => was corrected normally, so unactivate next time I download ESTAMP data
583 qecbnew <- dplyr::filter(qecbnew, site_year_month_day != "FINS_Quemenes.2020.10.16")
584
585 # save the final qecbnew df_
586
587 qecb <- qecbnew
588
589
590 `%notin%` <- Negate(`%in%`)
591
592 ## reorder and/or create new variables
593
594 # variable site_year_month_day moved for clarity purpose, not needed necessarily
595 qecb <- tibble::add_column(qecb, qecb$site_year_month_day, .after = "Site_bis")
596 qecb <- qecb[, -which(names(qecb) %in% c("site_year_month_day"))]
597 qecb <- dplyr::rename(qecb, site_year_month_day = `qecb$site_year_month_day`)
598
599 # new variable period (nothing to see with the existing périod variable)
600 period <- rep(NA, nrow(qecb))
601 qecb <- tibble::add_column(qecb, period, .after = "Day")
602 qecb$period <- ifelse(as.numeric(qecb$Month) < 7, "p1", "p2")
603 qecb$period <- as.factor(qecb$period)
604 rm(period)
605
606 qecb <- dplyr::arrange(qecb, region, site_year_month_day, Type.Bloc, Numéro.Bloc.échantillon, Face)
607
608 # NB: les infos surface d'accolement sont dupliquées de la face inf vers la face sup de blocs mobiles (même si peu de sens d'avoir cette info pour les face sup ...)
609 # NB: les data "Abondance ressources ciblées par pêcheurs à pied" présentes uniquement pour les blocs mobiles sont dupliquées entre face inf et face sup.
610
611 ## SCRIPT I - NAs <- 0
612
613 # already performed for part in the CB_qecb script ; but here I also consider mobile organisms, logical observation (or not) according to boulders, faces etc ... so more complete. Could be some kind of script fusion to only keep Na to 0 correction in one script, i.e. moving this script to the CB_qecb script ...
614
615 bretagne_bm <- dplyr::filter(qecb, region == "Bretagne" & Type.Bloc == "Bloc mobile")
616 bretagne_bf <- dplyr::filter(qecb, region == "Bretagne" & Type.Bloc %in% c("Bloc fixé", "Roche en place"))
617 egmp_basq_bm <- dplyr::filter(qecb, region == "EGMP.BASQ" & Type.Bloc == "Bloc mobile")
618 egmp_basq_bf <- dplyr::filter(qecb, region == "EGMP.BASQ" & Type.Bloc %in% c("Bloc fixé", "Roche en place"))
619
620 # replace NAs by "0" for variables used in qecb determination
621
622 bretagne_bm[, c(
623 "X..algues.brunes",
624 "Strate.algues.brunes",
625 "X..algues.rouges",
626 "Strate.algues.rouges",
627 "X..algues.vertes",
628 "Strate.algues.vertes",
629 "X..Cladophora",
630 "X..Lithophyllum",
631 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well.
632 #"Type.Sediment",
633 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well.
634 "Nb.Littorina.obtusata",
635 "Nb.Gibbula.cineraria",
636 "Nb.Gibbula.pennanti",
637 "Nb.Gibbula.umbilicalis",
638 "Nb.Phallusia.mamillata",
639 "Nb.Tethya.aurantium",
640 #"Nb.Spirobranchus.lamarckii.1B",
641 #"Nb.Spirobranchus.lamarckii.2B",
642 #"Nb.Spirobranchus.lamarckii.3B",
643 #"Nb.Spirobranchus.lamarckii.4B",
644 #"Nb.Spirobranchus.lamarckii.5B",
645 "Nb.Spirobranchus.lamarckii.total",
646 #"Nb.spirorbis.1A",
647 #"Nb.spirorbis.2A",
648 #"Nb.spirorbis.3A",
649 #"Nb.spirorbis.4A",
650 #"Nb.spirorbis.5A",
651 "Nb.spirorbis.total",
652 #.."Nb.Crassostrea.gigas",
653 #.."Nb.Ostrea.edulis",
654 #.."X..Mytilus.sp.",
655 #.."X..Hermelles",
656 #.."X..Hydraires",
657 "X..Eponges",
658 "X..Ascidies.Coloniales",
659 "X..Ascidies.Solitaires",
660 "X..Bryozoaires.Dresses",
661 "X..Balanes.Vivantes",
662 #"Commentaires.Avant",
663 "X..Surface.Accolement", # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well.
664 #"Type.sustrat.observé",
665 #"Commentaires",
666 "Nb.Cancer.pagurus..Tourteau.",
667 "Nb.Necora.puber..Etrille.",
668 "Nb.Carcinus.maenas..Crabe.vert.",
669 "Nb.Nucella.lapilus..Pourpre.",
670 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.",
671 #.."Nb.Octopus.vulgaris..Poulpe.",
672 "Nb.Galathea..Galathées.",
673 #.."Nb.Paracentrotus.lividus..Oursin.",
674 "Nb.Lophozozymus.incisus..ancien.Xantho.incisus.",
675 "Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.",
676 "Nb.Haliotis.tuberculata..Ormeau.",
677 #"Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.",
678 "Nb.Littorina.littorea..Bigorneau.",
679 "Nb.Xantho.pilipes..Xanthe.poilu.",
680 "Nb.Mimachlamys.varia..Pétoncle.noir."
681 )
682 ] <- lapply(bretagne_bm[, c(
683 "X..algues.brunes",
684 "Strate.algues.brunes",
685 "X..algues.rouges",
686 "Strate.algues.rouges",
687 "X..algues.vertes",
688 "Strate.algues.vertes",
689 "X..Cladophora",
690 "X..Lithophyllum",
691 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well.
692 #"Type.Sediment",
693 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well.
694 "Nb.Littorina.obtusata",
695 "Nb.Gibbula.cineraria",
696 "Nb.Gibbula.pennanti",
697 "Nb.Gibbula.umbilicalis",
698 "Nb.Phallusia.mamillata",
699 "Nb.Tethya.aurantium",
700 #"Nb.Spirobranchus.lamarckii.1B",
701 #"Nb.Spirobranchus.lamarckii.2B",
702 #"Nb.Spirobranchus.lamarckii.3B",
703 #"Nb.Spirobranchus.lamarckii.4B",
704 #"Nb.Spirobranchus.lamarckii.5B",
705 "Nb.Spirobranchus.lamarckii.total",
706 #"Nb.spirorbis.1A",
707 #"Nb.spirorbis.2A",
708 #"Nb.spirorbis.3A",
709 #"Nb.spirorbis.4A",
710 #"Nb.spirorbis.5A",
711 "Nb.spirorbis.total",
712 #.."Nb.Crassostrea.gigas",
713 #.."Nb.Ostrea.edulis",
714 #.."X..Mytilus.sp.",
715 #.."X..Hermelles",
716 #.."X..Hydraires",
717 "X..Eponges",
718 "X..Ascidies.Coloniales",
719 "X..Ascidies.Solitaires",
720 "X..Bryozoaires.Dresses",
721 "X..Balanes.Vivantes",
722 #"Commentaires.Avant",
723 "X..Surface.Accolement", # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well.
724 #"Type.sustrat.observé",
725 #"Commentaires",
726 "Nb.Cancer.pagurus..Tourteau.",
727 "Nb.Necora.puber..Etrille.",
728 "Nb.Carcinus.maenas..Crabe.vert.",
729 "Nb.Nucella.lapilus..Pourpre.",
730 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.",
731 #.."Nb.Octopus.vulgaris..Poulpe.",
732 "Nb.Galathea..Galathées.",
733 #.."Nb.Paracentrotus.lividus..Oursin.",
734 "Nb.Lophozozymus.incisus..ancien.Xantho.incisus.",
735 "Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.",
736 "Nb.Haliotis.tuberculata..Ormeau.",
737 #"Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.",
738 "Nb.Littorina.littorea..Bigorneau.",
739 "Nb.Xantho.pilipes..Xanthe.poilu.",
740 "Nb.Mimachlamys.varia..Pétoncle.noir."
741 )
742 ], function(x) replace(x, is.na(x), 0))
743
744
745 # bretagne_bf
746 bretagne_bf[, c(
747 "X..algues.brunes",
748 "Strate.algues.brunes",
749 "X..algues.rouges",
750 "Strate.algues.rouges",
751 "X..algues.vertes",
752 "Strate.algues.vertes",
753 "X..Cladophora",
754 "X..Lithophyllum",
755 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well.
756 #"Type.Sediment",
757 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well.
758 "Nb.Littorina.obtusata",
759 "Nb.Gibbula.cineraria",
760 "Nb.Gibbula.pennanti",
761 "Nb.Gibbula.umbilicalis",
762 "Nb.Phallusia.mamillata",
763 "Nb.Tethya.aurantium",
764 #"Nb.Spirobranchus.lamarckii.1B",
765 #"Nb.Spirobranchus.lamarckii.2B",
766 #"Nb.Spirobranchus.lamarckii.3B",
767 #"Nb.Spirobranchus.lamarckii.4B",
768 #"Nb.Spirobranchus.lamarckii.5B",
769 "Nb.Spirobranchus.lamarckii.total",
770 #"Nb.spirorbis.1A",
771 #"Nb.spirorbis.2A",
772 #"Nb.spirorbis.3A",
773 #"Nb.spirorbis.4A",
774 #"Nb.spirorbis.5A",
775 "Nb.spirorbis.total",
776 #.."Nb.Crassostrea.gigas",
777 #.."Nb.Ostrea.edulis",
778 #.."X..Mytilus.sp.",
779 #.."X..Hermelles",
780 #.."X..Hydraires",
781 "X..Eponges",
782 "X..Ascidies.Coloniales",
783 "X..Ascidies.Solitaires",
784 "X..Bryozoaires.Dresses",
785 "X..Balanes.Vivantes",
786 #"Commentaires.Avant",
787 "X..Surface.Accolement"#, # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well.
788 #"Type.sustrat.observé",
789 #"Commentaires",
790 #."Nb.Cancer.pagurus..Tourteau.",
791 #.."Nb.Necora.puber..Etrille.",
792 #."Nb.Carcinus.maenas..Crabe.vert.",
793 #."Nb.Nucella.lapilus..Pourpre.",
794 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.",
795 #.."Nb.Octopus.vulgaris..Poulpe.",
796 #."Nb.Galathea..Galathées.",
797 #.."Nb.Paracentrotus.lividus..Oursin.",
798 #."Nb.Lophozozymus.incisus..ancien.Xantho.incisus.",
799 #."Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.",
800 #."Nb.Haliotis.tuberculata..Ormeau.",
801 #."Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.",
802 #."Nb.Littorina.littorea..Bigorneau.",
803 #."Nb.Xantho.pilipes..Xanthe.poilu.",
804 #."Nb.Mimachlamys.varia..Pétoncle.noir."
805 )
806 ] <- lapply(bretagne_bf[, c(
807 "X..algues.brunes",
808 "Strate.algues.brunes",
809 "X..algues.rouges",
810 "Strate.algues.rouges",
811 "X..algues.vertes",
812 "Strate.algues.vertes",
813 "X..Cladophora",
814 "X..Lithophyllum",
815 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well.
816 #"Type.Sediment",
817 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well.
818 "Nb.Littorina.obtusata",
819 "Nb.Gibbula.cineraria",
820 "Nb.Gibbula.pennanti",
821 "Nb.Gibbula.umbilicalis",
822 "Nb.Phallusia.mamillata",
823 "Nb.Tethya.aurantium",
824 #"Nb.Spirobranchus.lamarckii.1B",
825 #"Nb.Spirobranchus.lamarckii.2B",
826 #"Nb.Spirobranchus.lamarckii.3B",
827 #"Nb.Spirobranchus.lamarckii.4B",
828 #"Nb.Spirobranchus.lamarckii.5B",
829 "Nb.Spirobranchus.lamarckii.total",
830 #"Nb.spirorbis.1A",
831 #"Nb.spirorbis.2A",
832 #"Nb.spirorbis.3A",
833 #"Nb.spirorbis.4A",
834 #"Nb.spirorbis.5A",
835 "Nb.spirorbis.total",
836 #.."Nb.Crassostrea.gigas",
837 #.."Nb.Ostrea.edulis",
838 #.."X..Mytilus.sp.",
839 #.."X..Hermelles",
840 #.."X..Hydraires",
841 "X..Eponges",
842 "X..Ascidies.Coloniales",
843 "X..Ascidies.Solitaires",
844 "X..Bryozoaires.Dresses",
845 "X..Balanes.Vivantes",
846 #"Commentaires.Avant",
847 "X..Surface.Accolement"#, # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well.
848 #"Type.sustrat.observé",
849 #"Commentaires",
850 #."Nb.Cancer.pagurus..Tourteau.",
851 #.."Nb.Necora.puber..Etrille.",
852 #."Nb.Carcinus.maenas..Crabe.vert.",
853 #."Nb.Nucella.lapilus..Pourpre.",
854 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.",
855 #.."Nb.Octopus.vulgaris..Poulpe.",
856 #."Nb.Galathea..Galathées.",
857 #.."Nb.Paracentrotus.lividus..Oursin.",
858 #."Nb.Lophozozymus.incisus..ancien.Xantho.incisus.",
859 #."Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.",
860 #."Nb.Haliotis.tuberculata..Ormeau.",
861 #."Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.",
862 #."Nb.Littorina.littorea..Bigorneau.",
863 #."Nb.Xantho.pilipes..Xanthe.poilu.",
864 #."Nb.Mimachlamys.varia..Pétoncle.noir."
865 )
866 ], function(x) replace(x, is.na(x), 0))
867
868
869 # egmp_basq_bm
870 egmp_basq_bm[, c(
871 "X..algues.brunes",
872 "Strate.algues.brunes",
873 "X..algues.rouges",
874 "Strate.algues.rouges",
875 "X..algues.vertes",
876 "Strate.algues.vertes",
877 "X..Cladophora",
878 "X..Lithophyllum",
879 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well.
880 #"Type.Sediment",
881 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well.
882 "Nb.Littorina.obtusata",
883 "Nb.Gibbula.cineraria",
884 "Nb.Gibbula.pennanti",
885 "Nb.Gibbula.umbilicalis",
886 "Nb.Phallusia.mamillata",
887 "Nb.Tethya.aurantium",
888 #"Nb.Spirobranchus.lamarckii.1B",
889 #"Nb.Spirobranchus.lamarckii.2B",
890 #"Nb.Spirobranchus.lamarckii.3B",
891 #"Nb.Spirobranchus.lamarckii.4B",
892 #"Nb.Spirobranchus.lamarckii.5B",
893 "Nb.Spirobranchus.lamarckii.total",
894 #"Nb.spirorbis.1A",
895 #"Nb.spirorbis.2A",
896 #"Nb.spirorbis.3A",
897 #"Nb.spirorbis.4A",
898 #"Nb.spirorbis.5A",
899 "Nb.spirorbis.total",
900 "Nb.Crassostrea.gigas",
901 "Nb.Ostrea.edulis",
902 "X..Mytilus.sp.",
903 "X..Hermelles",
904 "X..Hydraires",
905 "X..Eponges",
906 "X..Ascidies.Coloniales",
907 "X..Ascidies.Solitaires",
908 "X..Bryozoaires.Dresses",
909 "X..Balanes.Vivantes",
910 #"Commentaires.Avant",
911 "X..Surface.Accolement", # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well.
912 #"Type.sustrat.observé",
913 #"Commentaires",
914 "Nb.Cancer.pagurus..Tourteau.",
915 "Nb.Necora.puber..Etrille.",
916 "Nb.Carcinus.maenas..Crabe.vert.",
917 "Nb.Nucella.lapilus..Pourpre.",
918 "Nb.Eriphia.verrucosa..Crabe.verruqueux.",
919 "Nb.Octopus.vulgaris..Poulpe.",
920 "Nb.Galathea..Galathées.",
921 "Nb.Paracentrotus.lividus..Oursin.",
922 "Nb.Lophozozymus.incisus..ancien.Xantho.incisus.",
923 "Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.",
924 "Nb.Haliotis.tuberculata..Ormeau.",
925 "Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.",
926 "Nb.Littorina.littorea..Bigorneau.",
927 "Nb.Xantho.pilipes..Xanthe.poilu.",
928 "Nb.Mimachlamys.varia..Pétoncle.noir."
929 )
930 ] <- lapply(egmp_basq_bm[, c(
931 "X..algues.brunes",
932 "Strate.algues.brunes",
933 "X..algues.rouges",
934 "Strate.algues.rouges",
935 "X..algues.vertes",
936 "Strate.algues.vertes",
937 "X..Cladophora",
938 "X..Lithophyllum",
939 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well.
940 #"Type.Sediment",
941 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well.
942 "Nb.Littorina.obtusata",
943 "Nb.Gibbula.cineraria",
944 "Nb.Gibbula.pennanti",
945 "Nb.Gibbula.umbilicalis",
946 "Nb.Phallusia.mamillata",
947 "Nb.Tethya.aurantium",
948 #"Nb.Spirobranchus.lamarckii.1B",
949 #"Nb.Spirobranchus.lamarckii.2B",
950 #"Nb.Spirobranchus.lamarckii.3B",
951 #"Nb.Spirobranchus.lamarckii.4B",
952 #"Nb.Spirobranchus.lamarckii.5B",
953 "Nb.Spirobranchus.lamarckii.total",
954 #"Nb.spirorbis.1A",
955 #"Nb.spirorbis.2A",
956 #"Nb.spirorbis.3A",
957 #"Nb.spirorbis.4A",
958 #"Nb.spirorbis.5A",
959 "Nb.spirorbis.total",
960 "Nb.Crassostrea.gigas",
961 "Nb.Ostrea.edulis",
962 "X..Mytilus.sp.",
963 "X..Hermelles",
964 "X..Hydraires",
965 "X..Eponges",
966 "X..Ascidies.Coloniales",
967 "X..Ascidies.Solitaires",
968 "X..Bryozoaires.Dresses",
969 "X..Balanes.Vivantes",
970 #"Commentaires.Avant",
971 "X..Surface.Accolement", # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well.
972 #"Type.sustrat.observé",
973 #"Commentaires",
974 "Nb.Cancer.pagurus..Tourteau.",
975 "Nb.Necora.puber..Etrille.",
976 "Nb.Carcinus.maenas..Crabe.vert.",
977 "Nb.Nucella.lapilus..Pourpre.",
978 "Nb.Eriphia.verrucosa..Crabe.verruqueux.",
979 "Nb.Octopus.vulgaris..Poulpe.",
980 "Nb.Galathea..Galathées.",
981 "Nb.Paracentrotus.lividus..Oursin.",
982 "Nb.Lophozozymus.incisus..ancien.Xantho.incisus.",
983 "Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.",
984 "Nb.Haliotis.tuberculata..Ormeau.",
985 "Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.",
986 "Nb.Littorina.littorea..Bigorneau.",
987 "Nb.Xantho.pilipes..Xanthe.poilu.",
988 "Nb.Mimachlamys.varia..Pétoncle.noir."
989 )
990 ], function(x) replace(x, is.na(x), 0))
991
992
993 # egmp_basq_bf
994 egmp_basq_bf[, c(
995 "X..algues.brunes",
996 "Strate.algues.brunes",
997 "X..algues.rouges",
998 "Strate.algues.rouges",
999 "X..algues.vertes",
1000 "Strate.algues.vertes",
1001 "X..Cladophora",
1002 "X..Lithophyllum",
1003 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well.
1004 #"Type.Sediment",
1005 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well.
1006 "Nb.Littorina.obtusata",
1007 "Nb.Gibbula.cineraria",
1008 "Nb.Gibbula.pennanti",
1009 "Nb.Gibbula.umbilicalis",
1010 "Nb.Phallusia.mamillata",
1011 "Nb.Tethya.aurantium",
1012 #"Nb.Spirobranchus.lamarckii.1B",
1013 #"Nb.Spirobranchus.lamarckii.2B",
1014 #"Nb.Spirobranchus.lamarckii.3B",
1015 #"Nb.Spirobranchus.lamarckii.4B",
1016 #"Nb.Spirobranchus.lamarckii.5B",
1017 "Nb.Spirobranchus.lamarckii.total",
1018 #"Nb.spirorbis.1A",
1019 #"Nb.spirorbis.2A",
1020 #"Nb.spirorbis.3A",
1021 #"Nb.spirorbis.4A",
1022 #"Nb.spirorbis.5A",
1023 "Nb.spirorbis.total",
1024 "Nb.Crassostrea.gigas",
1025 "Nb.Ostrea.edulis",
1026 "X..Mytilus.sp.",
1027 "X..Hermelles",
1028 "X..Hydraires",
1029 "X..Eponges",
1030 "X..Ascidies.Coloniales",
1031 "X..Ascidies.Solitaires",
1032 "X..Bryozoaires.Dresses",
1033 "X..Balanes.Vivantes",
1034 #"Commentaires.Avant",
1035 "X..Surface.Accolement"#, # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well.
1036 #"Type.sustrat.observé",
1037 #"Commentaires",
1038 #."Nb.Cancer.pagurus..Tourteau.",
1039 #.."Nb.Necora.puber..Etrille.",
1040 #."Nb.Carcinus.maenas..Crabe.vert.",
1041 #."Nb.Nucella.lapilus..Pourpre.",
1042 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.",
1043 #.."Nb.Octopus.vulgaris..Poulpe.",
1044 #."Nb.Galathea..Galathées.",
1045 #.."Nb.Paracentrotus.lividus..Oursin.",
1046 #."Nb.Lophozozymus.incisus..ancien.Xantho.incisus.",
1047 #."Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.",
1048 #."Nb.Haliotis.tuberculata..Ormeau.",
1049 #."Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.",
1050 #."Nb.Littorina.littorea..Bigorneau.",
1051 #."Nb.Xantho.pilipes..Xanthe.poilu.",
1052 #."Nb.Mimachlamys.varia..Pétoncle.noir."
1053 )
1054 ] <- lapply(egmp_basq_bf[, c(
1055 "X..algues.brunes",
1056 "Strate.algues.brunes",
1057 "X..algues.rouges",
1058 "Strate.algues.rouges",
1059 "X..algues.vertes",
1060 "Strate.algues.vertes",
1061 "X..Cladophora",
1062 "X..Lithophyllum",
1063 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well.
1064 #"Type.Sediment",
1065 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well.
1066 "Nb.Littorina.obtusata",
1067 "Nb.Gibbula.cineraria",
1068 "Nb.Gibbula.pennanti",
1069 "Nb.Gibbula.umbilicalis",
1070 "Nb.Phallusia.mamillata",
1071 "Nb.Tethya.aurantium",
1072 #"Nb.Spirobranchus.lamarckii.1B",
1073 #"Nb.Spirobranchus.lamarckii.2B",
1074 #"Nb.Spirobranchus.lamarckii.3B",
1075 #"Nb.Spirobranchus.lamarckii.4B",
1076 #"Nb.Spirobranchus.lamarckii.5B",
1077 "Nb.Spirobranchus.lamarckii.total",
1078 #"Nb.spirorbis.1A",
1079 #"Nb.spirorbis.2A",
1080 #"Nb.spirorbis.3A",
1081 #"Nb.spirorbis.4A",
1082 #"Nb.spirorbis.5A",
1083 "Nb.spirorbis.total",
1084 "Nb.Crassostrea.gigas",
1085 "Nb.Ostrea.edulis",
1086 "X..Mytilus.sp.",
1087 "X..Hermelles",
1088 "X..Hydraires",
1089 "X..Eponges",
1090 "X..Ascidies.Coloniales",
1091 "X..Ascidies.Solitaires",
1092 "X..Bryozoaires.Dresses",
1093 "X..Balanes.Vivantes",
1094 #"Commentaires.Avant",
1095 "X..Surface.Accolement"#, # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well.
1096 #"Type.sustrat.observé",
1097 #"Commentaires",
1098 #."Nb.Cancer.pagurus..Tourteau.",
1099 #.."Nb.Necora.puber..Etrille.",
1100 #."Nb.Carcinus.maenas..Crabe.vert.",
1101 #."Nb.Nucella.lapilus..Pourpre.",
1102 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.",
1103 #.."Nb.Octopus.vulgaris..Poulpe.",
1104 #."Nb.Galathea..Galathées.",
1105 #.."Nb.Paracentrotus.lividus..Oursin.",
1106 #."Nb.Lophozozymus.incisus..ancien.Xantho.incisus.",
1107 #."Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.",
1108 #."Nb.Haliotis.tuberculata..Ormeau.",
1109 #."Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.",
1110 #."Nb.Littorina.littorea..Bigorneau.",
1111 #."Nb.Xantho.pilipes..Xanthe.poilu.",
1112 #."Nb.Mimachlamys.varia..Pétoncle.noir."
1113 )
1114 ], function(x) replace(x, is.na(x), 0))
1115
1116
1117 # merge dfs.
1118 qecbnato0 <- dplyr::bind_rows(bretagne_bm, bretagne_bf)
1119 qecbnato0 <- dplyr::bind_rows(qecbnato0, egmp_basq_bm)
1120 qecbnato0 <- dplyr::bind_rows(qecbnato0, egmp_basq_bf)
1121
1122 qecbnato0 <- dplyr::arrange(qecbnato0, region, site_year_month_day, Type.Bloc, Numéro.Bloc.échantillon, Face)
1123
1124 rm(bretagne_bm, bretagne_bf, egmp_basq_bm, egmp_basq_bf)
1125
1126
1127 ## analyse matricielle
1128
1129 # NB some variables were dplyr::renamed or created, cfr I originally merged qecb and ivr data in below script to do some correlation analysis. This is not the case anymore, so no more merging anymore.
1130
1131 qecbnato0 <- tibble::add_column(qecbnato0, region.site_year_month_day = paste0(qecbnato0$region, qecbnato0$site_year_month_day), .before = "region")
1132
1133
1134 numero_quadrat <- stringr::str_sub(qecbnato0$quadrat_bis, start = -1)
1135 qecbnato0 <- tibble::add_column(qecbnato0, numero_quadrat, .after = "quadrat_bis")
1136 rm(numero_quadrat)
1137 qecbnato0$numero_quadrat <- as.integer(qecbnato0$numero_quadrat)
1138
1139
1140 qecbnato0$Year <- as.integer(qecbnato0$Year)
1141 qecbnato0$Month <- as.integer(qecbnato0$Month)
1142 qecbnato0$Day <- as.integer(qecbnato0$Day)
1143
1144 ############################################################
1145 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1146 # Anna still hasn't corrected for boulder nb in FINS_Quemenes.2020.10.16 data encoding ! removed from the df_
1147 qecbnato0 <- qecbnato0 %>% dplyr::filter(site_year_month_day != "FINS_Quemenes.2020.10.16")
1148 ############################################################
1149
1150 # what to do with spirorbes & Nb.Spirobranchus.lamarckii.total? log10 transformation
1151
1152 qecbnato0 <- tibble::add_column(qecbnato0, log10.Nb.spirorbis.total = log10(qecbnato0$Nb.spirorbis.total + 1), .after = "Nb.spirorbis.total")
1153 qecbnato0 <- tibble::add_column(qecbnato0, log10.Nb.Spirobranchus.lamarckii.total = log10(qecbnato0$Nb.Spirobranchus.lamarckii.total + 1), .after = "Nb.Spirobranchus.lamarckii.total")
1154
1155 saveRDS(qecbnato0, "qecbnato0.RDS")
1156
1157
1158 ###############################################################
1159 # #
1160 # Start dissimilarity calculation with some data handling #
1161 # #
1162 ###############################################################
1163
1164 # first, create vector (4) for qecb and fishing by region (same as above)
1165
1166 bret_egmp_basq_qecb <- c(
1167 "X..algues.brunes",
1168 "X..algues.rouges",
1169 "X..algues.vertes",
1170 "X..Cladophora",
1171 "X..Lithophyllum",
1172 "Nb.Littorina.obtusata",
1173 "Nb.Gibbula.cineraria",
1174 "Nb.Gibbula.pennanti",
1175 "Nb.Gibbula.umbilicalis",
1176 "Nb.Phallusia.mamillata",
1177 "Nb.Tethya.aurantium",
1178 "Nb.Spirobranchus.lamarckii.total",
1179 "Nb.spirorbis.total",
1180 "X..Eponges",
1181 "X..Ascidies.Coloniales",
1182 "X..Ascidies.Solitaires",
1183 "X..Bryozoaires.Dresses",
1184 "X..Balanes.Vivantes"
1185 #, "X..Recouvrement.Sediment"
1186 #, "X..Roche.Nue"
1187 #, "X..Surface.Accolement"
1188 )
1189
1190 egmp_basq_qecb <- c("Nb.Crassostrea.gigas", "Nb.Ostrea.edulis", "X..Mytilus.sp.", "X..Hermelles", "X..Hydraires")
1191
1192 bret_egmp_basq_fishing <- c("Nb.Cancer.pagurus..Tourteau.",
1193 "Nb.Necora.puber..Etrille.",
1194 "Nb.Carcinus.maenas..Crabe.vert.",
1195 "Nb.Nucella.lapilus..Pourpre.",
1196 "Nb.Galathea..Galathées.",
1197 "Nb.Lophozozymus.incisus..ancien.Xantho.incisus.",
1198 "Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.",
1199 "Nb.Haliotis.tuberculata..Ormeau.",
1200 "Nb.Littorina.littorea..Bigorneau.",
1201 "Nb.Xantho.pilipes..Xanthe.poilu.",
1202 "Nb.Mimachlamys.varia..Pétoncle.noir.")
1203
1204 egmp_basq_fishing <- c("Nb.Eriphia.verrucosa..Crabe.verruqueux.", "Nb.Octopus.vulgaris..Poulpe.", "Nb.Paracentrotus.lividus..Oursin.", "Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.")
1205
1206 # here I can choose to either replace spirorbis and/or spirobranchus by their log10 transformation in bret_egmp_basq_qecb vector
1207 bret_egmp_basq_qecb <- replace(bret_egmp_basq_qecb, bret_egmp_basq_qecb == "Nb.spirorbis.total", "log10.Nb.spirorbis.total")
1208 saveRDS(bret_egmp_basq_qecb, "bret_egmp_basq_qecb.RDS")
1209
1210
1211 #############################################################
1212 # #
1213 # Compute dissimilarity #
1214 # #
1215 #############################################################
1216 ### determination of coefficient of dissimilarity face sup mobile bloc vs fixed bloc
1217
1218 # loop in a fct
1219
1220 matri_fct_bmf <- function(data, conca) {
1221
1222 matri_df <- data
1223
1224 for (x in c(1:length(unique(matri_df$site_year_month_day)))) {
1225
1226 qecbnato0_x <- matri_df %>% dplyr::filter(site_year_month_day == unique(matri_df$site_year_month_day)[[x]])
1227
1228 rownames(qecbnato0_x) <- paste0(qecbnato0_x$Type.Bloc, "_", qecbnato0_x$Face, "_", qecbnato0_x$Numéro.Bloc.échantillon, "_", qecbnato0_x$quadrat_bis)
1229
1230
1231 mtxdis <- vegan::vegdist(
1232 sqrt(qecbnato0_x[, conca]), #Transform your species abundance data_ Typically, raw abundances are transformed prior to analysis. Usually you will use square root, fourth-root, log(X+1), or presence-absence (square root being least extreme, P/A being most). I would start with square root. (https://stats.stackexchange.com/questions/234495/double-zeroes-problem-with-euclidean-distance-and-abundance-data-is-the-proble)
1233
1234 na.rm = TRUE,
1235 method = "bray" #Construct species abundance dissimilarity matrices with Bray-Curtis. If your data contains samples that are all-zero you will run into the double zero problem. This can be overcome by using a zero-adjusted Bray-Curtis coefficient, which is sometimes referred to as a 'dummy variable' which damps down the similarity fluctuations between samples that are both zero (undefined). => see below for zero-adjusted Bray-Curtis coefficient ; #another possibility, sqrt + 1 ??
1236 )
1237
1238
1239 #https://rdrr.io/github/phytomosaic/ecole/man/bray0.html
1240 # mtxdis <- ecole::bray0(
1241 # sqrt
1242 # (qecbnato0_x[,conca]), na.rm = TRUE)
1243
1244 expand.grid(mtxdis)
1245
1246 mtxdisdf_ <- as.data.frame(as.matrix(mtxdis))
1247
1248 a_ <- NA
1249 b_ <- NA
1250 c_ <- NA
1251 d_ <- NA
1252 e_ <- NA
1253 f_ <- NA
1254 g_ <- NA
1255 h_ <- NA
1256 i_ <- NA
1257 j_ <- NA
1258 k_ <- NA
1259 l_ <- NA
1260 m_ <- NA
1261 n_ <- NA
1262
1263 for (z in c(1:nrow(mtxdisdf_))) {
1264
1265 a_[[z]] <- (paste0(rownames(mtxdisdf_[z + 1, ]), " & ", ifelse(nrow(mtxdisdf_) >= 1, colnames(mtxdisdf_[1]), NA)))
1266 b_[[z]] <- (paste0(rownames(mtxdisdf_[z + 2, ]), " & ", ifelse(nrow(mtxdisdf_) >= 2, colnames(mtxdisdf_[2]), NA)))
1267 c_[[z]] <- (paste0(rownames(mtxdisdf_[z + 3, ]), " & ", ifelse(nrow(mtxdisdf_) >= 3, colnames(mtxdisdf_[3]), NA)))
1268 d_[[z]] <- (paste0(rownames(mtxdisdf_[z + 4, ]), " & ", ifelse(nrow(mtxdisdf_) >= 4, colnames(mtxdisdf_[4]), NA)))
1269 e_[[z]] <- (paste0(rownames(mtxdisdf_[z + 5, ]), " & ", ifelse(nrow(mtxdisdf_) >= 5, colnames(mtxdisdf_[5]), NA)))
1270 f_[[z]] <- (paste0(rownames(mtxdisdf_[z + 6, ]), " & ", ifelse(nrow(mtxdisdf_) >= 6, colnames(mtxdisdf_[6]), NA)))
1271 g_[[z]] <- (paste0(rownames(mtxdisdf_[z + 7, ]), " & ", ifelse(nrow(mtxdisdf_) >= 7, colnames(mtxdisdf_[7]), NA)))
1272 h_[[z]] <- (paste0(rownames(mtxdisdf_[z + 8, ]), " & ", ifelse(nrow(mtxdisdf_) >= 8, colnames(mtxdisdf_[8]), NA)))
1273 i_[[z]] <- (paste0(rownames(mtxdisdf_[z + 9, ]), " & ", ifelse(nrow(mtxdisdf_) >= 9, colnames(mtxdisdf_[9]), NA)))
1274 j_[[z]] <- (paste0(rownames(mtxdisdf_[z + 10, ]), " & ", ifelse(nrow(mtxdisdf_) >= 10, colnames(mtxdisdf_[10]), NA)))
1275 k_[[z]] <- (paste0(rownames(mtxdisdf_[z + 11, ]), " & ", ifelse(nrow(mtxdisdf_) >= 11, colnames(mtxdisdf_[11]), NA)))
1276 l_[[z]] <- (paste0(rownames(mtxdisdf_[z + 12, ]), " & ", ifelse(nrow(mtxdisdf_) >= 12, colnames(mtxdisdf_[12]), NA)))
1277 m_[[z]] <- (paste0(rownames(mtxdisdf_[z + 13, ]), " & ", ifelse(nrow(mtxdisdf_) >= 13, colnames(mtxdisdf_[13]), NA)))
1278 n_[[z]] <- (paste0(rownames(mtxdisdf_[z + 14, ]), " & ", ifelse(nrow(mtxdisdf_) >= 14, colnames(mtxdisdf_[14]), NA)))
1279
1280 }
1281
1282 rm(z)
1283
1284 y <- length(a_) - (ifelse(nrow(mtxdisdf_) >= 1, 1, nrow(mtxdisdf_)))
1285 a_ <- a_[1:y]
1286 y <- length(b_) - (ifelse(nrow(mtxdisdf_) >= 2, 2, nrow(mtxdisdf_)))
1287 b_ <- b_[1:y]
1288 y <- length(c_) - (ifelse(nrow(mtxdisdf_) >= 3, 3, nrow(mtxdisdf_)))
1289 c_ <- c_[1:y]
1290 y <- length(d_) - (ifelse(nrow(mtxdisdf_) >= 4, 4, nrow(mtxdisdf_)))
1291 d_ <- d_[1:y]
1292 y <- length(e_) - (ifelse(nrow(mtxdisdf_) >= 5, 5, nrow(mtxdisdf_)))
1293 e_ <- e_[1:y]
1294 y <- length(f_) - (ifelse(nrow(mtxdisdf_) >= 6, 6, nrow(mtxdisdf_)))
1295 f_ <- f_[1:y]
1296 y <- length(g_) - (ifelse(nrow(mtxdisdf_) >= 7, 7, nrow(mtxdisdf_)))
1297 g_ <- g_[1:y]
1298 y <- length(h_) - (ifelse(nrow(mtxdisdf_) >= 8, 8, nrow(mtxdisdf_)))
1299 h_ <- h_[1:y]
1300 y <- length(i_) - (ifelse(nrow(mtxdisdf_) >= 9, 9, nrow(mtxdisdf_)))
1301 i_ <- i_[1:y]
1302 y <- length(j_) - (ifelse(nrow(mtxdisdf_) >= 10, 10, nrow(mtxdisdf_)))
1303 j_ <- j_[1:y]
1304 y <- length(k_) - (ifelse(nrow(mtxdisdf_) >= 11, 11, nrow(mtxdisdf_)))
1305 k_ <- k_[1:y]
1306 y <- length(l_) - (ifelse(nrow(mtxdisdf_) >= 12, 12, nrow(mtxdisdf_)))
1307 l_ <- l_[1:y]
1308 y <- length(m_) - (ifelse(nrow(mtxdisdf_) >= 13, 13, nrow(mtxdisdf_)))
1309 m_ <- m_[1:y]
1310 y <- length(n_) - (ifelse(nrow(mtxdisdf_) >= 14, 14, nrow(mtxdisdf_)))
1311 n_ <- n_[1:y]
1312
1313 rm(y)
1314
1315 name_ <- c(a_, b_, c_, d_, e_, f_, g_, h_, i_, j_, k_, l_, m_, n_)
1316 df_ <- data.frame(expand.grid(mtxdis), name_[1:nrow(expand.grid(mtxdis))])
1317 names(df_) <- c("dist.", "name_")
1318
1319 rm(a_, b_, c_, d_, e_, f_, g_, h_, i_, j_, k_, l_, m_, n_)
1320 rm(name_)
1321
1322 q_ <- strsplit(df_$name_, " & ")
1323 mat_ <- matrix(unlist(q_), ncol = 2, byrow = TRUE)
1324 q_df_ <- as.data.frame(matrix(unlist(q_), ncol = 2, byrow = TRUE))
1325 df_ <- dplyr::bind_cols(df_, q_df_)
1326
1327 rm(q_, mat_, q_df_)
1328
1329 split_ <- strsplit(df_$V1, "_")
1330 v1_split_ <- as.data.frame(matrix(unlist(split_), ncol = 4, byrow = TRUE))
1331 split_ <- strsplit(df_$V2, "_")
1332 v2_split_ <- as.data.frame(matrix(unlist(split_), ncol = 4, byrow = TRUE))
1333
1334 df_ <- dplyr::bind_cols(df_, v1_split_)
1335 df_ <- dplyr::bind_cols(df_, v2_split_)
1336 df_red_ <- subset(df_, V4...8 == V4...12 & V1...5 != V1...9)
1337 site_year_month_day <- rep(unique(qecbnato0_x$site_year_month_day), nrow(df_red_))
1338
1339 df_red_ <- tibble::add_column(df_red_, site_year_month_day, .before = "dist.")
1340
1341 rm(split_, v1_split_, v2_split_)
1342 rm(mtxdis, mtxdisdf_, df_, site_year_month_day)
1343
1344 matri_list[[x]] <- df_red_
1345 matri_list <<- matri_list
1346
1347 rm(df_red_, qecbnato0_x, x)
1348
1349 }
1350
1351 matri_df <- do.call("rbind", matri_list)
1352
1353 names(matri_df) <- c("site_year_month_day", "dist.", "name_", "name_left", "name_right", "Type.Bloc.left", "Face.left", "Numéro.Bloc.échantillon.left", "Quadrat.left", "Type.Bloc.right", "Face.right", "Numéro.Bloc.échantillon.right", "Quadrat.right")
1354
1355 matri_df <<- matri_df
1356
1357 hist(matri_df$dist.)
1358
1359 }
1360
1361 data_ <- dplyr::filter(qecbnato0, Face == "face supérieure")
1362 data_$Type.Bloc <- ifelse(as.character(data_$Type.Bloc) == "Roche en place", "Bloc fixé", as.character(data_$Type.Bloc))
1363 matri_list <- vector("list", length(unique(data_$site_year_month_day)))
1364
1365 matri_fct_bmf(data = data_, conca = c(bret_egmp_basq_qecb, egmp_basq_qecb))
1366 hist(matri_df$dist., main = c(paste("Histo. of Bray (0-adjusted) dist. dissimilarity measures"), paste(" (sqrt transfo) - BMfs vs BF -")))
1367
1368 matri_full_bm_bf_fs <- matri_df
1369
1370
1371 rm(data_, matri_df, matri_list)
1372
1373
1374 ### determination of coefficient of dissimilarity between blocs mobiles face sup vs face inf.
1375
1376 # loop in a fct
1377
1378 matri_fct_bmm <- function(data, conca) {
1379
1380 matri_df <- data
1381
1382 for (x in c(1:length(unique(matri_df$site_year_month_day)))) {
1383
1384 qecbnato0_x <- matri_df %>% dplyr::filter(site_year_month_day == unique(matri_df$site_year_month_day)[[x]])
1385
1386 rownames(qecbnato0_x) <- paste0(qecbnato0_x$Type.Bloc, "_", qecbnato0_x$Face, "_", qecbnato0_x$Numéro.Bloc.échantillon, "_", qecbnato0_x$quadrat_bis)
1387
1388
1389 mtxdis <- vegan::vegdist(
1390 sqrt(qecbnato0_x[, c(bret_egmp_basq_qecb)]), #Transform your species abundance data_ Typically, raw abundances are transformed prior to analysis. Usually you will use square root, fourth-root, log(X+1), or presence-absence (square root being least extreme, P/A being most). I would start with square root. (https://stats.stackexchange.com/questions/234495/double-zeroes-problem-with-euclidean-distance-and-abundance-data-is-the-proble)
1391 na.rm = TRUE,
1392 method = "bray" #Construct species abundance dissimilarity matrices with Bray-Curtis. If your data contains samples that are all-zero you will run into the double zero problem. This can be overcome by using a zero-adjusted Bray-Curtis coefficient, which is sometimes referred to as a 'dummy variable' which damps down the similarity fluctuations between samples that are both zero (undefined). => see below for zero-adjusted Bray-Curtis coefficient ; #another possibility, sqrt + 1 ??
1393 )
1394
1395
1396 #mtxdis <- ecole::bray0(
1397 # sqrt(qecbnato0_x[, conca]), na.rm = TRUE)
1398
1399
1400 expand.grid(mtxdis)
1401
1402 mtxdisdf_ <- as.data.frame(as.matrix(mtxdis))
1403
1404 a_ <- NA
1405 b_ <- NA
1406 c_ <- NA
1407 d_ <- NA
1408 e_ <- NA
1409 f_ <- NA
1410 g_ <- NA
1411 h_ <- NA
1412 i_ <- NA
1413 j_ <- NA
1414 k_ <- NA
1415 l_ <- NA
1416 m_ <- NA
1417 n_ <- NA
1418 o_ <- NA
1419 p_ <- NA
1420 q_ <- NA
1421 r_ <- NA
1422 s_ <- NA
1423
1424 for (z in c(1:nrow(mtxdisdf_))) {
1425
1426 a_[[z]] <- (paste0(rownames(mtxdisdf_[z + 1, ]), " & ", ifelse(nrow(mtxdisdf_) >= 1, colnames(mtxdisdf_[1]), NA)))
1427 b_[[z]] <- (paste0(rownames(mtxdisdf_[z + 2, ]), " & ", ifelse(nrow(mtxdisdf_) >= 2, colnames(mtxdisdf_[2]), NA)))
1428 c_[[z]] <- (paste0(rownames(mtxdisdf_[z + 3, ]), " & ", ifelse(nrow(mtxdisdf_) >= 3, colnames(mtxdisdf_[3]), NA)))
1429 d_[[z]] <- (paste0(rownames(mtxdisdf_[z + 4, ]), " & ", ifelse(nrow(mtxdisdf_) >= 4, colnames(mtxdisdf_[4]), NA)))
1430 e_[[z]] <- (paste0(rownames(mtxdisdf_[z + 5, ]), " & ", ifelse(nrow(mtxdisdf_) >= 5, colnames(mtxdisdf_[5]), NA)))
1431 f_[[z]] <- (paste0(rownames(mtxdisdf_[z + 6, ]), " & ", ifelse(nrow(mtxdisdf_) >= 6, colnames(mtxdisdf_[6]), NA)))
1432 g_[[z]] <- (paste0(rownames(mtxdisdf_[z + 7, ]), " & ", ifelse(nrow(mtxdisdf_) >= 7, colnames(mtxdisdf_[7]), NA)))
1433 h_[[z]] <- (paste0(rownames(mtxdisdf_[z + 8, ]), " & ", ifelse(nrow(mtxdisdf_) >= 8, colnames(mtxdisdf_[8]), NA)))
1434 i_[[z]] <- (paste0(rownames(mtxdisdf_[z + 9, ]), " & ", ifelse(nrow(mtxdisdf_) >= 9, colnames(mtxdisdf_[9]), NA)))
1435 j_[[z]] <- (paste0(rownames(mtxdisdf_[z + 10, ]), " & ", ifelse(nrow(mtxdisdf_) >= 10, colnames(mtxdisdf_[10]), NA)))
1436 k_[[z]] <- (paste0(rownames(mtxdisdf_[z + 11, ]), " & ", ifelse(nrow(mtxdisdf_) >= 11, colnames(mtxdisdf_[11]), NA)))
1437 l_[[z]] <- (paste0(rownames(mtxdisdf_[z + 12, ]), " & ", ifelse(nrow(mtxdisdf_) >= 12, colnames(mtxdisdf_[12]), NA)))
1438 m_[[z]] <- (paste0(rownames(mtxdisdf_[z + 13, ]), " & ", ifelse(nrow(mtxdisdf_) >= 13, colnames(mtxdisdf_[13]), NA)))
1439 n_[[z]] <- (paste0(rownames(mtxdisdf_[z + 14, ]), " & ", ifelse(nrow(mtxdisdf_) >= 14, colnames(mtxdisdf_[14]), NA)))
1440 o_[[z]] <- (paste0(rownames(mtxdisdf_[z + 15, ]), " & ", ifelse(nrow(mtxdisdf_) >= 15, colnames(mtxdisdf_[15]), NA)))
1441 p_[[z]] <- (paste0(rownames(mtxdisdf_[z + 16, ]), " & ", ifelse(nrow(mtxdisdf_) >= 16, colnames(mtxdisdf_[16]), NA)))
1442 q_[[z]] <- (paste0(rownames(mtxdisdf_[z + 17, ]), " & ", ifelse(nrow(mtxdisdf_) >= 17, colnames(mtxdisdf_[17]), NA)))
1443 r_[[z]] <- (paste0(rownames(mtxdisdf_[z + 18, ]), " & ", ifelse(nrow(mtxdisdf_) >= 18, colnames(mtxdisdf_[18]), NA)))
1444 s_[[z]] <- (paste0(rownames(mtxdisdf_[z + 19, ]), " & ", ifelse(nrow(mtxdisdf_) >= 19, colnames(mtxdisdf_[19]), NA)))
1445
1446 }
1447
1448 rm(z)
1449
1450 y <- length(a_) - (ifelse(nrow(mtxdisdf_) >= 1, 1, nrow(mtxdisdf_)))
1451 a_ <- a_[1:y]
1452 y <- length(b_) - (ifelse(nrow(mtxdisdf_) >= 2, 2, nrow(mtxdisdf_)))
1453 b_ <- b_[1:y]
1454 y <- length(c_) - (ifelse(nrow(mtxdisdf_) >= 3, 3, nrow(mtxdisdf_)))
1455 c_ <- c_[1:y]
1456 y <- length(d_) - (ifelse(nrow(mtxdisdf_) >= 4, 4, nrow(mtxdisdf_)))
1457 d_ <- d_[1:y]
1458 y <- length(e_) - (ifelse(nrow(mtxdisdf_) >= 5, 5, nrow(mtxdisdf_)))
1459 e_ <- e_[1:y]
1460 y <- length(f_) - (ifelse(nrow(mtxdisdf_) >= 6, 6, nrow(mtxdisdf_)))
1461 f_ <- f_[1:y]
1462 y <- length(g_) - (ifelse(nrow(mtxdisdf_) >= 7, 7, nrow(mtxdisdf_)))
1463 g_ <- g_[1:y]
1464 y <- length(h_) - (ifelse(nrow(mtxdisdf_) >= 8, 8, nrow(mtxdisdf_)))
1465 h_ <- h_[1:y]
1466 y <- length(i_) - (ifelse(nrow(mtxdisdf_) >= 9, 9, nrow(mtxdisdf_)))
1467 i_ <- i_[1:y]
1468 y <- length(j_) - (ifelse(nrow(mtxdisdf_) >= 10, 10, nrow(mtxdisdf_)))
1469 j_ <- j_[1:y]
1470 y <- length(k_) - (ifelse(nrow(mtxdisdf_) >= 11, 11, nrow(mtxdisdf_)))
1471 k_ <- k_[1:y]
1472 y <- length(l_) - (ifelse(nrow(mtxdisdf_) >= 12, 12, nrow(mtxdisdf_)))
1473 l_ <- l_[1:y]
1474 y <- length(m_) - (ifelse(nrow(mtxdisdf_) >= 13, 13, nrow(mtxdisdf_)))
1475 m_ <- m_[1:y]
1476 y <- length(n_) - (ifelse(nrow(mtxdisdf_) >= 14, 14, nrow(mtxdisdf_)))
1477 n_ <- n_[1:y]
1478 y <- length(o_) - (ifelse(nrow(mtxdisdf_) >= 15, 15, nrow(mtxdisdf_)))
1479 o_ <- o_[1:y]
1480 y <- length(p_) - (ifelse(nrow(mtxdisdf_) >= 16, 16, nrow(mtxdisdf_)))
1481 p_ <- p_[1:y]
1482 y <- length(q_) - (ifelse(nrow(mtxdisdf_) >= 17, 17, nrow(mtxdisdf_)))
1483 q_ <- q_[1:y]
1484 y <- length(r_) - (ifelse(nrow(mtxdisdf_) >= 18, 18, nrow(mtxdisdf_)))
1485 r_ <- r_[1:y]
1486 y <- length(s_) - (ifelse(nrow(mtxdisdf_) >= 19, 19, nrow(mtxdisdf_)))
1487 s_ <- s_[1:y]
1488
1489 rm(y)
1490
1491 name_ <- c(a_, b_, c_, d_, e_, f_, g_, h_, i_, j_, k_, l_, m_, n_, o_, p_, q_, r_, s_)
1492 df_ <- data.frame(expand.grid(mtxdis), name_[1:seq_len(nrow(expand.grid(mtxdis)))])
1493 names(df_) <- c("dist.", "name_")
1494
1495 rm(a_, b_, c_, d_, e_, f_, g_, h_, i_, j_, k_, l_, m_, n_, o_, p_, q_, r_, s_)
1496 rm(name_)
1497
1498 q_ <- strsplit(df_$name_, " & ")
1499 mat_ <- matrix(unlist(q_), ncol = 2, byrow = TRUE)
1500 q_df_ <- as.data.frame(matrix(unlist(q_), ncol = 2, byrow = TRUE))
1501 df_ <- dplyr::bind_cols(df_, q_df_)
1502
1503 rm(q_, mat_, q_df_)
1504
1505 split_ <- strsplit(df_$V1, "_")
1506 v1_split_ <- as.data.frame(matrix(unlist(split_), ncol = 4, byrow = TRUE))
1507 split_ <- strsplit(df_$V2, "_")
1508 v2_split_ <- as.data.frame(matrix(unlist(split_), ncol = 4, byrow = TRUE))
1509
1510 df_ <- dplyr::bind_cols(df_, v1_split_)
1511 df_ <- dplyr::bind_cols(df_, v2_split_)
1512 df_red_ <- subset(df_, V4...8 == V4...12 & V3...7 == V3...11)
1513 site_year_month_day <- rep(unique(qecbnato0_x$site_year_month_day), nrow(df_red_))
1514
1515 df_red_ <- tibble::add_column(df_red_, site_year_month_day, .before = "dist.")
1516
1517 rm(split_, v1_split_, v2_split_)
1518 rm(mtxdis, mtxdisdf_, df_, site_year_month_day)
1519
1520 matri_list[[x]] <- df_red_
1521 matri_list <<- matri_list
1522
1523 rm(df_red_, qecbnato0_x, x)
1524
1525 }
1526
1527 matri_df <- do.call("rbind", matri_list)
1528
1529 names(matri_df) <- c("site_year_month_day", "dist.", "name_", "name_left", "name_right", "Type.Bloc.left", "Face.left", "Numéro.Bloc.échantillon.left", "Quadrat.left", "Type.Bloc.right", "Face.right", "Numéro.Bloc.échantillon.right", "Quadrat.right")
1530
1531 matri_df <<- matri_df
1532
1533 hist(matri_df$dist.)
1534
1535 }
1536
1537 data_ <- dplyr::filter(qecbnato0, Type.Bloc == "Bloc mobile")
1538 matri_list <- vector("list", length(unique(data_$site_year_month_day)))
1539
1540 matri_fct_bmm(data = data_, conca = c(bret_egmp_basq_qecb, egmp_basq_qecb))
1541 hist(matri_df$dist., main = c(paste("Histo. of Bray (0-adjusted) dist. dissimilarity measures"), paste(" (sqrt transfo) - BMfs vs BMfi -")))
1542
1543 matri_full_bm_bf_fi <- matri_df
1544
1545 rm(data_, matri_df, matri_list)
1546
1547
1548 #############################################################
1549 # #
1550 # Plot dissimilarity #
1551 # #
1552 #############################################################
1553 ## plot
1554
1555 # activate line
1556
1557 matri_full_bm_bf_fs <- tidyr::separate(matri_full_bm_bf_fs, "site_year_month_day", into = c("departement", "Site", "Year", "Month", "Day"), remove = FALSE)
1558 matri_full_bm_bf_fs$Site <- paste0(matri_full_bm_bf_fs$departement, "_", matri_full_bm_bf_fs$Site)
1559 matri_full_bm_bf_fs <- subset(matri_full_bm_bf_fs, select = - c(departement))
1560
1561 matri_full_bm_bf_fs <- tibble::add_column(matri_full_bm_bf_fs, Date = as.Date(paste0(matri_full_bm_bf_fs$Year, "-", matri_full_bm_bf_fs$Month, "-", matri_full_bm_bf_fs$Day), origin = "1970-01-01"), .after = "Site")
1562 matri_full_bm_bf_fs$Site <- as.factor(matri_full_bm_bf_fs$Site)
1563
1564 matri_full_bm_bf_fi <- tidyr::separate(matri_full_bm_bf_fi, "site_year_month_day", into = c("departement", "Site", "Year", "Month", "Day"), remove = FALSE)
1565 matri_full_bm_bf_fi$Site <- paste0(matri_full_bm_bf_fi$departement, "_", matri_full_bm_bf_fi$Site)
1566 matri_full_bm_bf_fi <- subset(matri_full_bm_bf_fi, select = - c(departement))
1567 matri_full_bm_bf_fi <- tibble::add_column(matri_full_bm_bf_fi, Date = as.Date(paste0(matri_full_bm_bf_fi$Year, "-", matri_full_bm_bf_fi$Month, "-", matri_full_bm_bf_fi$Day), origin = "1970-01-01"), .after = "Site")
1568 matri_full_bm_bf_fi$Site <- as.factor(matri_full_bm_bf_fi$Site)
1569
1570 # if error message "Error in .Call.graphics(C_palette2, .Call(C_palette2, NULL)) : invalid graphics state"
1571
1572
1573 bf_fs_plot <- ggplot2::ggplot(matri_full_bm_bf_fs, ggplot2::aes(x = Site, y = dist.)) +
1574 ggplot2::geom_boxplot() +
1575 #geom_jitter(shape = 16, position=position_jitter(0.2)) +
1576 ggplot2::xlab("") +
1577 ggplot2::ylab("distance diss. BM.BF_FS") +
1578 ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1))
1579
1580 ggplot2::ggsave("distance_diss_BF_FS.png", bf_fs_plot, height = 4.5, width = 4)
1581
1582 fs_fi_plot <- ggplot2::ggplot(matri_full_bm_bf_fi, ggplot2::aes(x = Site, y = dist.)) +
1583 ggplot2::geom_boxplot() +
1584 #geom_jitter(shape = 16, position=position_jitter(0.2)) +
1585 ggplot2::xlab("") +
1586 ggplot2::ylab("distance diss. BM_FS.FI") +
1587 ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1))
1588
1589 ggplot2::ggsave("distance_diss_FS_FI.png", fs_fi_plot, height = 4.5, width = 4)
1590
1591 # issue with type de bloc, numéro de bloc and quadrat for df_ BM.BF_FS, cfr df_ left vs right variables doesn't give the right combination (variables with left vs right label in names come from the dissimilarity coefficient functions).
1592 matri_full_bm_bf_fs$Quadrat <- NA
1593 for (i in c(1:seq_len(nrow(matri_full_bm_bf_fs)))) {
1594 ifelse(matri_full_bm_bf_fs$Type.Bloc.left[i] == "Bloc mobile", matri_full_bm_bf_fs$Quadrat[i] <- matri_full_bm_bf_fs$Quadrat.left[i], matri_full_bm_bf_fs$Quadrat[i] <- matri_full_bm_bf_fs$Quadrat.right[i])
1595 }
1596 matri_full_bm_bf_fs$Numéro.Bloc <- NA
1597 for (i in c(1:seq_len(nrow(matri_full_bm_bf_fs)))) {
1598 ifelse(matri_full_bm_bf_fs$Type.Bloc.left[i] == "Bloc mobile", matri_full_bm_bf_fs$Numéro.Bloc[i] <- matri_full_bm_bf_fs$Numéro.Bloc.échantillon.left[i], matri_full_bm_bf_fs$Numéro.Bloc[i] <- matri_full_bm_bf_fs$Numéro.Bloc.échantillon.right[i])
1599 }
1600
1601 matri_full_bm_bf_fs <- tibble::add_column(matri_full_bm_bf_fs, site_year_month_day.q_BMnb = paste0(matri_full_bm_bf_fs$site_year_month_day, "_", matri_full_bm_bf_fs$Quadrat, "_", matri_full_bm_bf_fs$Numéro.Bloc), .after = "site_year_month_day")
1602 matri_full_bm_bf_fi <- tibble::add_column(matri_full_bm_bf_fi, site_year_month_day.q_BMnb = paste0(matri_full_bm_bf_fi$site_year_month_day, "_", matri_full_bm_bf_fi$Quadrat.left, "_", matri_full_bm_bf_fi$Numéro.Bloc.échantillon.left), .after = "site_year_month_day")
1603
1604 colnames(matri_full_bm_bf_fs) <- paste("BM.BF_FS", colnames(matri_full_bm_bf_fs), sep = "_")
1605 matri_full_bm_bf_fs <- dplyr::rename(matri_full_bm_bf_fs, site_year_month_day.q_BMnb = BM.BF_FS_site_year_month_day.q_BMnb)
1606 colnames(matri_full_bm_bf_fi) <- paste("BM_FS.FI", colnames(matri_full_bm_bf_fi), sep = "_")
1607 matri_full_bm_bf_fi <- dplyr::rename(matri_full_bm_bf_fi, site_year_month_day.q_BMnb = BM_FS.FI_site_year_month_day.q_BMnb)
1608
1609 matri_full <- dplyr::full_join(matri_full_bm_bf_fs[, c("site_year_month_day.q_BMnb", "BM.BF_FS_dist.")], matri_full_bm_bf_fi[, c("site_year_month_day.q_BMnb", "BM_FS.FI_dist.")])
1610
1611 matri_full <- tidyr::separate(matri_full, "site_year_month_day.q_BMnb", into = c("departement", "Site", "Year", "Month", "Day", "Quadrat", "Bloc Mobile Number"), remove = FALSE)
1612 matri_full$Site <- paste0(matri_full$departement, "_", matri_full$Site)
1613 matri_full <- subset(matri_full, select = - c(departement))
1614 matri_full <- tibble::add_column(matri_full, Date = as.Date(paste0(matri_full$Year, "-", matri_full$Month, "-", matri_full$Day), origin = "1970-01-01"), .after = "Site")
1615
1616 # Name for report/plot
1617
1618 matri_full <- tibble::add_column(matri_full, Site_bis = matri_full$Site, .after = "Site")
1619
1620 matri_full$Site_bis <- ifelse(matri_full$Site == "GDMO_Locmariaquer", "Locmariaquer", matri_full$Site_bis)
1621 matri_full$Site_bis <- ifelse(matri_full$Site == "GDMO_BegLann", "Beg Lann", matri_full$Site_bis)
1622 matri_full$Site_bis <- ifelse(matri_full$Site == "FOUR_PlateauFour", "Plateau du Four", matri_full$Site_bis)
1623 matri_full$Site_bis <- ifelse(matri_full$Site == "EGMP_GroinCou", "Groin du Cou", matri_full$Site_bis)
1624 matri_full$Site_bis <- ifelse(matri_full$Site == "EGMP_PasEmsembert", "Le Pas d'Emsembert", matri_full$Site_bis)
1625 matri_full$Site_bis <- ifelse(matri_full$Site == "EGMP_BreeBains", "La Brée-les-Bains", matri_full$Site_bis)
1626 matri_full$Site_bis <- ifelse(matri_full$Site == "EGMP_PerreAntiochat", "Le Perré d'Antiochat", matri_full$Site_bis)
1627 matri_full$Site_bis <- ifelse(matri_full$Site == "EGMP_Chassiron", "Chassiron", matri_full$Site_bis)
1628 matri_full$Site_bis <- ifelse(matri_full$Site == "BASQ_FlotsBleusZP", "Les Flots Bleus / zone pêcheurs", matri_full$Site_bis)
1629 matri_full$Site_bis <- ifelse(matri_full$Site == "BASQ_FlotsBleusZF", "Les Flots Bleus / zone familles", matri_full$Site_bis)
1630 matri_full$Site_bis <- ifelse(matri_full$Site == "GONB_IlotStMichel", "Îlot Saint-Michel", matri_full$Site_bis)
1631 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_Quemenes", "Quéménès", matri_full$Site_bis)
1632 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_SeinGoulenez", "Île de Sein - Goulenez", matri_full$Site_bis)
1633 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_SeinKilaourou", "Île de Sein - Kilaourou", matri_full$Site_bis)
1634 matri_full$Site_bis <- ifelse(matri_full$Site == "ARMO_Verdelet", "Îlot du Verdelet", matri_full$Site_bis)
1635 matri_full$Site_bis <- ifelse(matri_full$Site == "ARMO_Piegu", "Piégu", matri_full$Site_bis)
1636 matri_full$Site_bis <- ifelse(matri_full$Site == "ARMO_Bilfot", "Pointe de Bilfot", matri_full$Site_bis)
1637 matri_full$Site_bis <- ifelse(matri_full$Site == "ARMO_IlePlate", "Île Plate", matri_full$Site_bis)
1638 matri_full$Site_bis <- ifelse(matri_full$Site == "PDMO_Perharidy", "Perharidy", matri_full$Site_bis)
1639 matri_full$Site_bis <- ifelse(matri_full$Site == "BRES_Keraliou", "Keraliou", matri_full$Site_bis)
1640 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_Mousterlin", "Pointe de Mousterlin", matri_full$Site_bis)
1641 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_StNicolasGlenan", "Saint-Nicolas des Glénan", matri_full$Site_bis)
1642 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_AnseRoz", "Pointe de l'Anse du Roz", matri_full$Site_bis)
1643
1644 saveRDS(matri_full, "matri_full.RDS")
1645
1646 #############################################################
1647 # #
1648 # Plot the dissimilarity per site #
1649 # #
1650 #############################################################
1651 ## plot dissimilarity coefficient
1652
1653 matri_full$Year <- as.integer(matri_full$Year)
1654 matri_full$Month <- as.integer(matri_full$Month)
1655 matri_full$Day <- as.integer(matri_full$Day)
1656
1657
1658 ## BM_FS.FI_dist => mobile boulder upper vs lower faces
1659
1660 # Stats
1661
1662 bm_fs_fi_dist_stat <- matri_full %>% dplyr::group_by(Site, Site_bis, Date, Year, Month, Day) %>% dplyr::summarize(BM_FS.FI_dist.moy = mean(BM_FS.FI_dist., na.rm = TRUE), BM_FS.FI_dist.et = sd(BM_FS.FI_dist., na.rm = TRUE), BM_FS.FI_dist.med = median(BM_FS.FI_dist., na.rm = TRUE), BM_FS.FI_dist.min = min(BM_FS.FI_dist., na.rm = TRUE), BM_FS.FI_dist.max = max(BM_FS.FI_dist., na.rm = TRUE), nb. = dplyr::n(), nb.notNa = sum(!is.na(BM_FS.FI_dist.)))
1663
1664 bm_fs_fi_dist_stat <- dplyr::ungroup(bm_fs_fi_dist_stat)
1665
1666 # Quality scale based on quartiles
1667
1668 if (choice == "N") {
1669 one <- round(mean(unlist(dplyr::filter(matri_full, BM_FS.FI_dist. <= quantile(matri_full$BM_FS.FI_dist., 0.25, na.rm = TRUE))["BM_FS.FI_dist."])), digits = 3)
1670 two <- round(mean(unlist(dplyr::filter(matri_full, BM_FS.FI_dist. > quantile(matri_full$BM_FS.FI_dist., 0.25, na.rm = TRUE) & BM_FS.FI_dist. <= quantile(matri_full$BM_FS.FI_dist., 0.5, na.rm = TRUE))["BM_FS.FI_dist."])), digits = 3)
1671 three <- round(mean(unlist(dplyr::filter(matri_full, BM_FS.FI_dist. > quantile(matri_full$BM_FS.FI_dist., 0.5, na.rm = TRUE) & BM_FS.FI_dist. <= quantile(matri_full$BM_FS.FI_dist., 0.75, na.rm = TRUE))["BM_FS.FI_dist."])), digits = 3)
1672 four <- round(mean(unlist(dplyr::filter(matri_full, BM_FS.FI_dist. > quantile(matri_full$BM_FS.FI_dist., 0.75, na.rm = TRUE))["BM_FS.FI_dist."])), digits = 3)
1673 }else {
1674 one <- 0.47
1675 two <- 0.7
1676 three <- 0.83
1677 four <- 0.98
1678 }
1679
1680 val_xmax <- as.Date(paste0(as.character(choice_date + 1), "-01-01"), origin = "1970-01-01")
1681
1682 for (i in c(1:length(unique(bm_fs_fi_dist_stat$Site)))) {
1683
1684 df1 <- dplyr::filter(bm_fs_fi_dist_stat, bm_fs_fi_dist_stat$Site == unique(bm_fs_fi_dist_stat$Site)[i])
1685
1686 bm_fs_fi_plot <- ggplot2::ggplot() +
1687 ggplot2::geom_point(ggplot2::aes(x = bm_fs_fi_dist_stat$Date, y = bm_fs_fi_dist_stat$BM_FS.FI_dist.med), col = "grey") +
1688 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = - 0.1, ymax = one, fill = "blue"), alpha = 0.3) +
1689 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = one, ymax = two, fill = "green"), alpha = 0.3) +
1690 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = two, ymax = three, fill = "yellow"), alpha = 0.3) +
1691 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = three, ymax = four, fill = "orange"), alpha = 0.3) +
1692 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = four, ymax = 1.1, fill = "red"), alpha = 0.3) +
1693 ggplot2::scale_fill_manual(values = c("#FF0000", "#F59404", "#18E125", "#1A1AE8", "#FAFA15")) +
1694 ggplot2::geom_pointrange(ggplot2::aes(x = df1$Date, y = df1$BM_FS.FI_dist.med, ymin = df1$BM_FS.FI_dist.min, ymax = df1$BM_FS.FI_dist.max), col = "black") +
1695 ggplot2::xlab("Date") +
1696 ggplot2::ylab("Coef dissim BM FS-FI") +
1697 ggplot2::ggtitle(unique(df1$Site_bis)) +
1698 ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1), legend.position = "none")
1699
1700 ggplot2::ggsave(paste0("fs_fi_", df1$Site, ".png"), device = "png", bm_fs_fi_plot, height = 3, width = 3.5)
1701
1702 }
1703
1704 rm(df1, four, i, one, three, two, xmax_, xmin_, ymax_, ymin_)
1705
1706
1707 ## BM.BF_FS_dist => mobile boulder vs fixed boulder upper faces
1708
1709 # Stats
1710
1711 bm_bf_fs_dist_stat <- matri_full %>% dplyr::group_by(Site, Site_bis, Date, Year, Month, Day) %>% dplyr::summarize(BM.BF_FS_dist.moy = mean(BM.BF_FS_dist., na.rm = TRUE), BM.BF_FS_dist.et = sd(BM.BF_FS_dist., na.rm = TRUE), BM.BF_FS_dist.med = median(BM.BF_FS_dist., na.rm = TRUE), BM.BF_FS_dist.min = min(BM.BF_FS_dist., na.rm = TRUE), BM.BF_FS_dist.max = max(BM.BF_FS_dist., na.rm = TRUE), nb. = dplyr::n(), nb.notNa = sum(!is.na(BM.BF_FS_dist.)))
1712
1713 bm_bf_fs_dist_stat <- dplyr::ungroup(bm_bf_fs_dist_stat)
1714
1715 # Quality scale based on quartiles
1716
1717 if (choice == "N") {
1718 one <- round(mean(unlist(dplyr::filter(matri_full, BM.BF_FS_dist. <= quantile(matri_full$BM.BF_FS_dist., 0.25, na.rm = TRUE))["BM.BF_FS_dist."])), digits = 3)
1719 two <- round(mean(unlist(dplyr::filter(matri_full, BM.BF_FS_dist. > quantile(matri_full$BM.BF_FS_dist., 0.25, na.rm = TRUE) & BM.BF_FS_dist. <= quantile(matri_full$BM.BF_FS_dist., 0.5, na.rm = TRUE))["BM.BF_FS_dist."])), digits = 3)
1720 three <- round(mean(unlist(dplyr::filter(matri_full, BM.BF_FS_dist. > quantile(matri_full$BM.BF_FS_dist., 0.5, na.rm = TRUE) & BM.BF_FS_dist. <= quantile(matri_full$BM.BF_FS_dist., 0.75, na.rm = TRUE))["BM.BF_FS_dist."])), digits = 3)
1721 four <- round(mean(unlist(dplyr::filter(matri_full, BM.BF_FS_dist. > quantile(matri_full$BM.BF_FS_dist., 0.75, na.rm = TRUE))["BM.BF_FS_dist."])), digits = 3)
1722
1723 }else {
1724 one <- 0.19
1725 two <- 0.32
1726 three <- 0.455
1727 four <- 0.735
1728 }
1729 # Plot
1730
1731 for (i in c(1:length(unique(bm_bf_fs_dist_stat$Site)))) {
1732
1733 df1 <- dplyr::filter(bm_bf_fs_dist_stat, bm_bf_fs_dist_stat$Site == unique(bm_bf_fs_dist_stat$Site)[i])
1734
1735 bm_bf_fs_plot <- ggplot2::ggplot() +
1736 ggplot2::geom_point(ggplot2::aes(x = bm_bf_fs_dist_stat$Date, y = bm_bf_fs_dist_stat$BM.BF_FS_dist.med), col = "grey") +
1737 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = - 0.1, ymax = one, fill = "red"), alpha = 0.3) +
1738 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = one, ymax = two, fill = "orange"), alpha = 0.3) +
1739 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = two, ymax = three, fill = "yellow"), alpha = 0.3) +
1740 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = three, ymax = four, fill = "green"), alpha = 0.3) +
1741 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = four, ymax = 1.1, fill = "blue"), alpha = 0.3) +
1742 ggplot2::scale_fill_manual(values = c("#FF0000", "#F59404", "#18E125", "#1A1AE8", "#FAFA15")) +
1743 ggplot2::geom_pointrange(ggplot2::aes(x = df1$Date, y = df1$BM.BF_FS_dist.med, ymin = df1$BM.BF_FS_dist.min, ymax = df1$BM.BF_FS_dist.max), col = "black") +
1744 ggplot2::xlab("Date") +
1745 ggplot2::ylab("Coef dissim BM-BF FS") +
1746 ggplot2::ggtitle(unique(df1$Site_bis)) +
1747 ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1), legend.position = "none")
1748
1749 ggplot2::ggsave(paste0("bm_bf_", df1$Site, ".png"), device = "png", bm_bf_fs_plot, height = 3, width = 3.5)
1750
1751 }
1752
1753 rm(df1, four, i, one, three, two, xmax_, xmin_, ymax_, ymin_)