Mercurial > repos > ecology > cb_ivr
comparison cb_dissimilarity.r @ 0:8c6142630659 draft
planemo upload for repository https://github.com/Marie59/champ_blocs commit 8b6fcddd239979c11977472de6cbb349690758c8
author | ecology |
---|---|
date | Fri, 02 Dec 2022 16:13:07 +0000 |
parents | |
children | bcbad4f83dec |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:8c6142630659 |
---|---|
1 # author: "Jonathan Richir" | |
2 # date: "01 October 2022" | |
3 | |
4 | |
5 #Rscript | |
6 | |
7 ############################### | |
8 ## ## | |
9 ############################### | |
10 | |
11 #####Packages : dplyr | |
12 # tidyr | |
13 # readr | |
14 # writexl | |
15 # stringr | |
16 # readxl | |
17 # tibble | |
18 # lubridate | |
19 # cowplot | |
20 # magrittr | |
21 # rmarkdown | |
22 library(magrittr) | |
23 #####Load arguments | |
24 | |
25 args <- commandArgs(trailingOnly = TRUE) | |
26 | |
27 #####Import data | |
28 | |
29 if (length(args) < 1) { | |
30 stop("This tool needs at least 1 argument") | |
31 }else { | |
32 fiche_val <- args[1] | |
33 input_data <- args[2] | |
34 choice <- args[3] | |
35 choice_date <- as.numeric(args[4]) | |
36 } | |
37 | |
38 ############################################################# | |
39 # # | |
40 # Loading and cleaning data # | |
41 # # | |
42 ############################################################# | |
43 # load qecb data | |
44 | |
45 qecb <- read.csv2(input_data, header = TRUE, fileEncoding = "Latin1") # fileEncoding = "Latin1", cfr é in variable names | |
46 | |
47 # import csv files ficheterrain | |
48 | |
49 fiche <- read.csv2(fiche_val, header = TRUE, fileEncoding = "Latin1") # fileEncoding = "Latin1", cfr é in variable names | |
50 | |
51 ## work on "Fiche terrain" | |
52 | |
53 date_fiche <- as.Date(stringr::str_sub(fiche$date.sortie, end = 10), origin = "1970-01-01") | |
54 fiche <- tibble::add_column(fiche, date_fiche, .after = "date.sortie") | |
55 rm(date_fiche) | |
56 | |
57 ## qecb vs fiche terrain | |
58 | |
59 fiche_red <- dplyr::filter(fiche, fiche$ID.Fiche %in% unique(qecb[, c("id")])) | |
60 | |
61 id_count <- qecb %>% dplyr::group_by(id) %>% dplyr::count() | |
62 id_count <- dplyr::rename(id_count, "ID.Fiche" = "id") | |
63 id_count <- data.frame(id_count) | |
64 fiche_red <- dplyr::left_join(fiche_red, id_count) | |
65 | |
66 # rep fiche terrain information | |
67 fiche_expanded <- fiche_red[rep(row.names(fiche_red), fiche_red$n), 1:ncol(fiche_red)] | |
68 fiche_expanded <- dplyr::rename(fiche_expanded, "id" = "ID.Fiche") | |
69 | |
70 ## merge qecb data and ficheterrain information | |
71 | |
72 qecb <- dplyr::bind_cols(qecb, fiche_expanded) | |
73 qecb <- dplyr::rename(qecb, "id_qecb" = "id...1") | |
74 qecb <- dplyr::rename(qecb, "id_fiche" = "id...68") | |
75 | |
76 rm(fiche_expanded, fiche_red, id_count) | |
77 | |
78 qecb <- qecb %>% tidyr::separate(date_fiche, c("Year", "Month", "Day"), sep = "-", remove = FALSE) | |
79 | |
80 | |
81 ## quadrat nb : in contrast to ivr df, quadrat number is missing for many observations in qecb df ; but from the Numero.Photo variable (boulder photo associated to field data collection), I could get back most missing quadrat numbers | |
82 | |
83 quadrat <- stringr::str_extract(qecb$Numero.Photo, "Q[12345]") | |
84 qecb <- tibble::add_column(qecb, quadrat, .after = "Numero.Photo") | |
85 rm(quadrat) | |
86 | |
87 # check | |
88 quadrat_bis <- rep(NA, length = nrow(qecb)) | |
89 qecb <- tibble::add_column(qecb, quadrat_bis, .after = "quadrat") | |
90 rm(quadrat_bis) | |
91 | |
92 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon %in% c(1, 2) & qecb$Type.Bloc == "Bloc mobile", "Q1", qecb$quadrat_bis) | |
93 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon %in% c(3, 4) & qecb$Type.Bloc == "Bloc mobile", "Q2", qecb$quadrat_bis) | |
94 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon %in% c(5, 6) & qecb$Type.Bloc == "Bloc mobile", "Q3", qecb$quadrat_bis) | |
95 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon %in% c(7, 8) & qecb$Type.Bloc == "Bloc mobile", "Q4", qecb$quadrat_bis) | |
96 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon %in% c(9, 10) & qecb$Type.Bloc == "Bloc mobile", "Q5", qecb$quadrat_bis) | |
97 | |
98 | |
99 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon == 1 & qecb$Type.Bloc %in% c("Bloc fixé", "Roche en place"), "Q1", qecb$quadrat_bis) | |
100 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon == 2 & qecb$Type.Bloc %in% c("Bloc fixé", "Roche en place"), "Q2", qecb$quadrat_bis) | |
101 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon == 3 & qecb$Type.Bloc %in% c("Bloc fixé", "Roche en place"), "Q3", qecb$quadrat_bis) | |
102 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon == 4 & qecb$Type.Bloc %in% c("Bloc fixé", "Roche en place"), "Q4", qecb$quadrat_bis) | |
103 qecb$quadrat_bis <- ifelse(qecb$Numéro.Bloc.échantillon == 5 & qecb$Type.Bloc %in% c("Bloc fixé", "Roche en place"), "Q5", qecb$quadrat_bis) | |
104 | |
105 ## I create two new variables for Site names, one for data analysis and one for data reporting. Only works for actual ivr df with 22 sites ! | |
106 | |
107 # Name for data analysis | |
108 | |
109 qecb <- tibble::add_column(qecb, Site = qecb$zone.habitat, .after = "ID.Fiche") | |
110 | |
111 for (x in seq_along(qecb$Site)) { | |
112 if (grepl(pattern = "Locmariaquer", qecb$Site[x]) == TRUE) { | |
113 qecb$Site[x] <- "GDMO_Locmariaquer" | |
114 } else if (grepl(pattern = "Beg Lann", qecb$Site[x]) == TRUE) { | |
115 qecb$Site[x] <- "GDMO_BegLann" | |
116 } else if (grepl(pattern = "Plateau du Four", qecb$Site[x]) == TRUE) { | |
117 qecb$Site[x] <- "FOUR_PlateauFour" | |
118 } else if (grepl(pattern = "Grouin", qecb$Site[x]) == TRUE) { | |
119 qecb$Site[x] <- "EGMP_GroinCou" | |
120 } else if (grepl(pattern = "Ensembert", qecb$Site[x]) == TRUE) { | |
121 qecb$Site[x] <- "EGMP_PasEmsembert" | |
122 } else if (grepl(pattern = "Brée-les-Bains", qecb$Site[x]) == TRUE) { | |
123 qecb$Site[x] <- "EGMP_BreeBains" | |
124 } else if (grepl(pattern = "Antiochat", qecb$Site[x]) == TRUE) { | |
125 qecb$Site[x] <- "EGMP_PerreAntiochat" | |
126 } else if (grepl(pattern = "Chassiron", qecb$Site[x]) == TRUE) { | |
127 qecb$Site[x] <- "EGMP_Chassiron" | |
128 } else if (grepl(pattern = "zone p", qecb$Site[x]) == TRUE) { | |
129 qecb$Site[x] <- "BASQ_FlotsBleusZP" | |
130 } else if (grepl(pattern = "zone f", qecb$Site[x]) == TRUE) { | |
131 qecb$Site[x] <- "BASQ_FlotsBleusZF" | |
132 } else if (grepl(pattern = "Saint-Michel", qecb$Site[x]) == TRUE) { | |
133 qecb$Site[x] <- "GONB_IlotStMichel" | |
134 } else if (grepl(pattern = "Quéménès", qecb$Site[x]) == TRUE) { | |
135 qecb$Site[x] <- "FINS_Quemenes" | |
136 } else if (grepl(pattern = "Goulenez", qecb$Site[x]) == TRUE) { | |
137 qecb$Site[x] <- "FINS_SeinGoulenez" | |
138 } else if (grepl(pattern = "Kilaourou", qecb$Site[x]) == TRUE) { | |
139 qecb$Site[x] <- "FINS_SeinKilaourou" | |
140 } else if (grepl(pattern = "Verdelet", qecb$Site[x]) == TRUE) { | |
141 qecb$Site[x] <- "ARMO_Verdelet" | |
142 } else if (grepl(pattern = "Piégu", qecb$Site[x]) == TRUE) { | |
143 qecb$Site[x] <- "ARMO_Piegu" | |
144 } else if (grepl(pattern = "Bilfot", qecb$Site[x]) == TRUE) { | |
145 qecb$Site[x] <- "ARMO_Bilfot" | |
146 } else if (grepl(pattern = "Plate", qecb$Site[x]) == TRUE) { | |
147 qecb$Site[x] <- "ARMO_IlePlate" | |
148 } else if (grepl(pattern = "Perharidy", qecb$Site[x]) == TRUE) { | |
149 qecb$Site[x] <- "PDMO_Perharidy" | |
150 } else if (grepl(pattern = "Keraliou", qecb$Site[x]) == TRUE) { | |
151 qecb$Site[x] <- "BRES_Keraliou" | |
152 } else if (grepl(pattern = "Mousterlin", qecb$Site[x]) == TRUE) { | |
153 qecb$Site[x] <- "FINS_Mousterlin" | |
154 } else if (grepl(pattern = "Nicolas", qecb$Site[x]) == TRUE) { | |
155 qecb$Site[x] <- "FINS_StNicolasGlenan" | |
156 } | |
157 if (grepl(pattern = "Roz", qecb$site[x]) == TRUE) { | |
158 qecb$Site[x] <- "FINS_AnseRoz" | |
159 } | |
160 } | |
161 | |
162 # Name for report/plot | |
163 | |
164 qecb <- tibble::add_column(qecb, Site_bis = qecb$Site, .after = "Site") | |
165 | |
166 qecb$Site_bis <- ifelse(qecb$Site == "GDMO_Locmariaquer", "Locmariaquer", qecb$Site_bis) | |
167 qecb$Site_bis <- ifelse(qecb$Site == "GDMO_BegLann", "Beg Lann", qecb$Site_bis) | |
168 qecb$Site_bis <- ifelse(qecb$Site == "FOUR_PlateauFour", "Plateau du Four", qecb$Site_bis) | |
169 qecb$Site_bis <- ifelse(qecb$Site == "EGMP_GroinCou", "Groin du Cou", qecb$Site_bis) | |
170 qecb$Site_bis <- ifelse(qecb$Site == "EGMP_PasEmsembert", "Le Pas d'Emsembert", qecb$Site_bis) | |
171 qecb$Site_bis <- ifelse(qecb$Site == "EGMP_BreeBains", "La Brée-les-Bains", qecb$Site_bis) | |
172 qecb$Site_bis <- ifelse(qecb$Site == "EGMP_PerreAntiochat", "Le Perré d'Antiochat", qecb$Site_bis) | |
173 qecb$Site_bis <- ifelse(qecb$Site == "EGMP_Chassiron", "Chassiron", qecb$Site_bis) | |
174 qecb$Site_bis <- ifelse(qecb$Site == "BASQ_FlotsBleusZP", "Les Flots Bleus / zone pêcheurs", qecb$Site_bis) | |
175 qecb$Site_bis <- ifelse(qecb$Site == "BASQ_FlotsBleusZF", "Les Flots Bleus / zone familles", qecb$Site_bis) | |
176 qecb$Site_bis <- ifelse(qecb$Site == "GONB_IlotStMichel", "Îlot Saint-Michel", qecb$Site_bis) | |
177 qecb$Site_bis <- ifelse(qecb$Site == "FINS_Quemenes", "Quéménès", qecb$Site_bis) | |
178 qecb$Site_bis <- ifelse(qecb$Site == "FINS_SeinGoulenez", "Île de Sein - Goulenez", qecb$Site_bis) | |
179 qecb$Site_bis <- ifelse(qecb$Site == "FINS_SeinKilaourou", "Île de Sein - Kilaourou", qecb$Site_bis) | |
180 qecb$Site_bis <- ifelse(qecb$Site == "ARMO_Verdelet", "Îlot du Verdelet", qecb$Site_bis) | |
181 qecb$Site_bis <- ifelse(qecb$Site == "ARMO_Piegu", "Piégu", qecb$Site_bis) | |
182 qecb$Site_bis <- ifelse(qecb$Site == "ARMO_Bilfot", "Pointe de Bilfot", qecb$Site_bis) | |
183 qecb$Site_bis <- ifelse(qecb$Site == "ARMO_IlePlate", "Île Plate", qecb$Site_bis) | |
184 qecb$Site_bis <- ifelse(qecb$Site == "PDMO_Perharidy", "Perharidy", qecb$Site_bis) | |
185 qecb$Site_bis <- ifelse(qecb$Site == "BRES_Keraliou", "Keraliou", qecb$Site_bis) | |
186 qecb$Site_bis <- ifelse(qecb$Site == "FINS_Mousterlin", "Pointe de Mousterlin", qecb$Site_bis) | |
187 qecb$Site_bis <- ifelse(qecb$Site == "FINS_StNicolasGlenan", "Saint-Nicolas des Glénan", qecb$Site_bis) | |
188 qecb$Site_bis <- ifelse(qecb$Site == "FINS_AnseRoz", "Pointe de l'Anse du Roz", qecb$Site_bis) | |
189 | |
190 ## change some variables to factor | |
191 | |
192 # change 'X..' variables that are indeed % to numeric; https://stackoverflow.com/questions/59410939/apply-function-to-all-variables-with-string-in-name | |
193 ix <- grep("^X..", names(qecb)) | |
194 qecb[ix] <- lapply(qecb[ix], as.numeric) | |
195 rm(ix) | |
196 | |
197 | |
198 ## save the final, complete qecb df_ | |
199 | |
200 qecb <- qecb[, c(72:107, 1:71)] | |
201 | |
202 ## qecb df preparation prior qecb calculation | |
203 | |
204 # Several issues to solve in the df first | |
205 | |
206 | |
207 qecb$Type.Bloc <- factor(qecb$Type.Bloc, levels = c("Bloc mobile", "Bloc fixé", "Roche en place")) | |
208 | |
209 qecb$Face <- factor(qecb$Face, levels = c("face supérieure", "face inférieure")) | |
210 | |
211 qecb <- dplyr::arrange(qecb, Type.Bloc, Face, Numéro.Bloc.échantillon) | |
212 | |
213 qecb <- tibble::add_column(qecb, site_year_month_day = paste0(qecb$Site, ".", qecb$Year, ".", qecb$Month, ".", qecb$Day), .after = "Site_bis") | |
214 | |
215 # save qecb as a new df_ for analysis purpose => several changes to operate to run the code and functions | |
216 | |
217 qecbnew <- qecb | |
218 | |
219 # df with list object nb and corresponding site_year_month_day value to solve for loop issues | |
220 | |
221 df_list_loop <- data.frame("site_year_month_day" = unique(qecbnew$site_year_month_day), | |
222 "loop nb" = c(1:seq_along(unique(qecbnew$site_year_month_day)))) | |
223 | |
224 # dplyr::filter for df that makes problem, then eventually correct in the dataframe for wrong coding; brackets (xx) for nb because will change when qecb df_ enlarged. | |
225 # these listed boulder field survey error when highlighted when running the loop, that ran into an error ; it was a step by step procedure with solving one listed observation after another when issues appeared. Surely not the best way to proceed, maybe better just to skip these surveys (site + date), but in the present case I wanted to keep most of the observations, therefore I corrected them manually whenever needed. | |
226 | |
227 # list nb (28) - EGMP_BreeBains.2016.04.06 | |
228 qecbnew$Face <- as.character(qecbnew$Face) | |
229 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_La Bree_20160406_VImport.xlsx" & qecbnew$Référence.bloc == "avr16-LaBreeB9sup", "face supérieure", qecbnew$Face) | |
230 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_La Bree_20160406_VImport.xlsx" & qecbnew$Référence.bloc == "avr16-LaBreeB10sup", "face supérieure", qecbnew$Face) | |
231 qecbnew$Face <- as.factor(qecbnew$Face) | |
232 | |
233 # list nb 33 - EGMP_PerreAntiochat.2016.04.07 | |
234 qecbnew$Face <- as.character(qecbnew$Face) | |
235 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_PerAnt_20160407_VImport.xlsx" & qecbnew$Référence.bloc == "avr16-PerAntB9sup", "face supérieure", qecbnew$Face) | |
236 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_PerAnt_20160407_VImport.xlsx" & qecbnew$Référence.bloc == "avr16-PerAntB10sup", "face supérieure", qecbnew$Face) | |
237 qecbnew$Face <- as.factor(qecbnew$Face) | |
238 | |
239 # list nb 37 - EGMP_Chassiron.2016.03.09 | |
240 qecbnew$Face <- as.character(qecbnew$Face) | |
241 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_Chassiron_20160309&10_VImport.xlsx" & qecbnew$Référence.bloc == "mars16-ChassB9sup", "face supérieure", qecbnew$Face) | |
242 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_Chassiron_20160309&10_VImport.xlsx" & qecbnew$Référence.bloc == "mars16-ChasB10sup", "face supérieure", qecbnew$Face) | |
243 qecbnew$Face <- as.factor(qecbnew$Face) | |
244 | |
245 # list nb 76 - ARMO_Verdelet.2015.03.23 | |
246 qecbnew$Face <- as.character(qecbnew$Face) | |
247 qecbnew$Face <- ifelse(qecbnew$ID.Fiche == "BDD_IVR&QECB_Verdelet_20150323_VImport.xlsx" & qecbnew$Référence.bloc == "mar15-VerB10inf", "face inférieure", qecbnew$Face) | |
248 qecbnew$Face <- as.factor(qecbnew$Face) | |
249 | |
250 # list nb 116 - "GDMO_Locmariaquer.2018.09.10" | |
251 qecbnew$Type.Bloc <- as.character(qecbnew$Type.Bloc) | |
252 qecbnew$Type.Bloc <- ifelse(qecbnew$ID.Fiche == "2018-09-10-GDMO-CDB-001" & qecbnew$Numero.Photo == "2018-09-10_GDMO_01_CDB-5_sup_392578.jpg", "Roche en place", qecbnew$Type.Bloc) | |
253 qecbnew$Type.Bloc <- as.factor(qecbnew$Type.Bloc) | |
254 qecbnew$quadrat_bis <- ifelse(qecbnew$ID.Fiche == "2018-09-10-GDMO-CDB-001" & qecbnew$Numero.Photo == "2018-09-10_GDMO_01_CDB-5_sup_392578.jpg", "Q5", qecbnew$quadrat_bis) | |
255 qecbnew <- qecbnew %>% dplyr::filter(!(ID.Fiche == "2018-09-10-GDMO-CDB-001" & Numero.Photo == "")) | |
256 | |
257 # Few sites to remove prior running the for loop because it was not just a encoding mistake for one data, but a globally wroing coding for the site + date survey. | |
258 | |
259 qecb_i <- qecbnew %>% dplyr::filter(site_year_month_day == "FINS_StNicolasGlenan.2016.04.08") # no bloc fixe ! | |
260 qecbnew <- qecbnew %>% dplyr::filter(site_year_month_day != "FINS_StNicolasGlenan.2016.04.08") | |
261 qecb_i <- qecbnew %>% dplyr::filter(site_year_month_day == "GDMO_Locmariaquer.2019.09.30") # most faces of blocs mobiles do not correspond to each other; only 3 over 10 boulder have data for both face supérieure and face inférieure | |
262 qecbnew <- qecbnew %>% dplyr::filter(site_year_month_day != "GDMO_Locmariaquer.2019.09.30") | |
263 rm(df_list_loop, qecb_i) | |
264 | |
265 | |
266 # check for species with count within sub-0.1m^2-quadrat (i.e. reduced size quadrat compare to most organisms on boulder to count them, because abundant ; then some extrapolation) | |
267 | |
268 # first for Spirobranchus | |
269 | |
270 qecbnew$Nb.Spirobranchus.lamarckii.total.ini <- qecbnew$Nb.Spirobranchus.lamarckii.total | |
271 qecbnew$Nb.Spirobranchus.lamarckii.total <- as.character(qecbnew$Nb.Spirobranchus.lamarckii.total) | |
272 | |
273 | |
274 qecbnew_spirobranchus <- (dplyr::filter(qecbnew, Nb.Spirobranchus.lamarckii.total %in% c(NA, "NaN", "Inf", "-Inf"))) | |
275 qecbnew_spirobranchus[, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")] <- sapply(qecbnew_spirobranchus[, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")], as.character) | |
276 (spirobranchus_data <- subset(qecbnew_spirobranchus, !is.na(qecbnew_spirobranchus$Nb.Spirobranchus.lamarckii.1B) || !is.na(qecbnew_spirobranchus$Nb.Spirobranchus.lamarckii.2B) || !is.na(qecbnew_spirobranchus$Nb.Spirobranchus.lamarckii.3B) || !is.na(qecbnew_spirobranchus$Nb.Spirobranchus.lamarckii.4B) || !is.na(qecbnew_spirobranchus$Nb.Spirobranchus.lamarckii.5B))[, c("site_year_month_day", "Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total")]) | |
277 | |
278 quemenes <- dplyr::filter(qecbnew, Site == "FINS_Quemenes") | |
279 quemenes <- dplyr::arrange(quemenes, date_fiche) | |
280 # for Quemenes, issue because for sampling date "FINS_Quemenes.2015.09.30" the 5 counts of Spirobranchus were encoded in 1B instead of total !!! I noticed this issue when mining data (see below), therefore I corrected before running below script for Spirobranchus. | |
281 qecbnew$Nb.Spirobranchus.lamarckii.total <- ifelse(qecbnew$site_year_month_day == "FINS_Quemenes.2015.09.30" & is.na(qecbnew$Nb.Spirobranchus.lamarckii.total), qecbnew$Nb.Spirobranchus.lamarckii.1B, qecbnew$Nb.Spirobranchus.lamarckii.total) | |
282 | |
283 (quemenes <- dplyr::filter(qecbnew, site_year_month_day == "FINS_Quemenes.2015.09.30")[, c("site_year_month_day", "Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total")]) | |
284 rm(quemenes) | |
285 | |
286 seinkilaourou <- dplyr::filter(qecbnew, Site == "FINS_SeinKilaourou") | |
287 seinkilaourou <- dplyr::arrange(seinkilaourou, date_fiche) | |
288 # same issue with SeinKilaourou | |
289 qecbnew$Nb.Spirobranchus.lamarckii.total <- ifelse(qecbnew$site_year_month_day == "FINS_SeinKilaourou.2015.04.21" & is.na(qecbnew$Nb.Spirobranchus.lamarckii.total), qecbnew$Nb.Spirobranchus.lamarckii.1B, qecbnew$Nb.Spirobranchus.lamarckii.total) | |
290 (seinkilaourou <- dplyr::filter(qecbnew, site_year_month_day == "FINS_SeinKilaourou.2015.04.21")[, c("site_year_month_day", "Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total")]) | |
291 rm(seinkilaourou) | |
292 | |
293 # some more issues however with "x100"count data | |
294 spirobranchus <- subset(qecbnew, !is.na(qecbnew$Nb.Spirobranchus.lamarckii.1B) & !is.na(qecbnew$Nb.Spirobranchus.lamarckii.2B) & !is.na(qecbnew$Nb.Spirobranchus.lamarckii.3B) & !is.na(qecbnew$Nb.Spirobranchus.lamarckii.4B) & !is.na(qecbnew$Nb.Spirobranchus.lamarckii.5B) & !is.na(qecbnew$Nb.Spirobranchus.lamarckii.total))[, c("site_year_month_day", "Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total")] | |
295 | |
296 for (i in c(1:nrow(spirobranchus))) { | |
297 spirobranchus$mean.x.100[[i]] <- sum(spirobranchus[i, c(2:6)], na.rm = TRUE) / sum(!is.na(spirobranchus[i, c(2:6)])) * 100 | |
298 } | |
299 | |
300 spirobranchus$mean.x.100 <- unlist(spirobranchus$mean.x.100) | |
301 spirobranchus$Nb.Spirobranchus.lamarckii.total <- as.numeric(spirobranchus$Nb.Spirobranchus.lamarckii.total) | |
302 for (i in c(1:nrow(spirobranchus))) { | |
303 spirobranchus$diff[[i]] <- spirobranchus[i, "Nb.Spirobranchus.lamarckii.total"] - spirobranchus[i, "mean.x.100"] | |
304 } | |
305 | |
306 spirobranchus$diff <- abs(as.integer(spirobranchus$diff)) | |
307 spirobranchus <- dplyr::arrange(spirobranchus, desc(diff), mean.x.100) | |
308 spirobranchus <- dplyr::arrange(dplyr::filter(spirobranchus, diff != 0 & mean.x.100 != 0), desc(diff)) | |
309 | |
310 # check it all in the qecbnew df | |
311 | |
312 for (i in c(1:nrow(qecbnew))) { | |
313 qecbnew$mean.x.100[[i]] <- | |
314 sum(qecbnew[i, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")], na.rm = TRUE) / sum(!is.na(qecbnew[i, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")])) * 100 | |
315 } # sum of only NAs/0 = NaN; so replace NaN by Na | |
316 qecbnew$mean.x.100 <- as.character(qecbnew$mean.x.100) | |
317 | |
318 | |
319 for (i in c(1:nrow(qecbnew))) { | |
320 qecbnew$mean.x.100[[i]] <- ifelse(qecbnew$mean.x.100[[i]] == "NaN", NA, qecbnew$mean.x.100[[i]]) | |
321 } | |
322 qecbnew$mean.x.100 <- as.integer(qecbnew$mean.x.100) | |
323 | |
324 qecbnew$Nb.Spirobranchus.lamarckii.total <- as.integer(qecbnew$Nb.Spirobranchus.lamarckii.total) | |
325 qecbnew$Nb.Spirobranchus.lamarckii.total.diff <- abs((qecbnew$Nb.Spirobranchus.lamarckii.total - qecbnew$mean.x.100)) | |
326 spirobranchus_diff <- qecbnew[, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total", "Nb.Spirobranchus.lamarckii.total.ini", "mean.x.100", "Nb.Spirobranchus.lamarckii.total.diff")] | |
327 spirobranchus_diff <- dplyr::arrange(spirobranchus_diff, desc(Nb.Spirobranchus.lamarckii.total.diff), mean.x.100) | |
328 spirobranchus_diff <- dplyr::arrange(dplyr::filter(spirobranchus_diff, Nb.Spirobranchus.lamarckii.total.diff != 0 & mean.x.100 != 0), desc(Nb.Spirobranchus.lamarckii.total.diff)) | |
329 | |
330 qecbnew$Nb.Spirobranchus.lamarckii.total <- ifelse(qecbnew$Nb.Spirobranchus.lamarckii.total.diff != 0 & qecbnew$mean.x.100 != 0, qecbnew$mean.x.100, qecbnew$Nb.Spirobranchus.lamarckii.total) | |
331 spirobranchus_diff <- qecbnew[, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total", "Nb.Spirobranchus.lamarckii.total.ini", "mean.x.100", "Nb.Spirobranchus.lamarckii.total.diff")] | |
332 spirobranchus_diff$Nb.Spirobranchus.lamarckii.total.diff <- abs(as.integer(spirobranchus_diff$Nb.Spirobranchus.lamarckii.total.diff)) | |
333 spirobranchus_diff <- dplyr::arrange(spirobranchus_diff, desc(Nb.Spirobranchus.lamarckii.total.diff), mean.x.100) | |
334 spirobranchus_diff <- dplyr::arrange(dplyr::filter(spirobranchus_diff, Nb.Spirobranchus.lamarckii.total.diff != 0 & mean.x.100 != 0), desc(Nb.Spirobranchus.lamarckii.total.diff)) | |
335 # ok, change made when data x 100 was not correct. | |
336 | |
337 # finally, change NA by mean.x100 for Spirobranchus total | |
338 qecbnew$Nb.Spirobranchus.lamarckii.total <- as.character(qecbnew$Nb.Spirobranchus.lamarckii.total) | |
339 for (i in c(1:nrow(qecbnew))) { | |
340 qecbnew$Nb.Spirobranchus.lamarckii.total[[i]] <- ifelse(qecbnew$Nb.Spirobranchus.lamarckii.total[[i]] %in% c(NA, "NaN", "Inf", "-Inf"), sum(qecbnew[i, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")], na.rm = TRUE) / sum(!is.na(qecbnew[i, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B")])) * 100, qecbnew$Nb.Spirobranchus.lamarckii.total[[i]]) | |
341 } # sum of only NAs/0 = NaN; so replace NaN by Na | |
342 | |
343 | |
344 for (i in c(1:nrow(qecbnew))) { | |
345 qecbnew$Nb.Spirobranchus.lamarckii.total[[i]] <- ifelse(qecbnew$Nb.Spirobranchus.lamarckii.total[[i]] == "NaN", NA, qecbnew$Nb.Spirobranchus.lamarckii.total[[i]]) | |
346 } | |
347 | |
348 qecbnew$Nb.Spirobranchus.lamarckii.total <- as.integer(qecbnew$Nb.Spirobranchus.lamarckii.total) | |
349 | |
350 qecbnew$Nb.Spirobranchus.lamarckii.total.diff <- abs(qecbnew$Nb.Spirobranchus.lamarckii.total - qecbnew$Nb.Spirobranchus.lamarckii.total.ini) | |
351 spirobranchus_diff <- qecbnew[, c("Nb.Spirobranchus.lamarckii.1B", "Nb.Spirobranchus.lamarckii.2B", "Nb.Spirobranchus.lamarckii.3B", "Nb.Spirobranchus.lamarckii.4B", "Nb.Spirobranchus.lamarckii.5B", "Nb.Spirobranchus.lamarckii.total", "Nb.Spirobranchus.lamarckii.total.ini", "mean.x.100", "Nb.Spirobranchus.lamarckii.total.diff")] | |
352 spirobranchus_diff <- dplyr::arrange(spirobranchus_diff, desc(Nb.Spirobranchus.lamarckii.total.diff), mean.x.100) | |
353 spirobranchus_diff <- dplyr::arrange(dplyr::filter(spirobranchus_diff, Nb.Spirobranchus.lamarckii.total.diff != 0 & mean.x.100 != 0), desc(Nb.Spirobranchus.lamarckii.total.diff)) | |
354 | |
355 qecbnew <- subset(qecbnew, select = -c(Nb.Spirobranchus.lamarckii.total.ini, mean.x.100, Nb.Spirobranchus.lamarckii.total.diff)) | |
356 | |
357 rm(qecbnew_spirobranchus, spirobranchus, spirobranchus_data, spirobranchus_diff) | |
358 | |
359 # do the same for spirorbis | |
360 | |
361 qecbnew$Nb.spirorbis.total.ini <- qecbnew$Nb.spirorbis.total | |
362 qecbnew$Nb.spirorbis.total <- as.character(qecbnew$Nb.spirorbis.total) | |
363 | |
364 qecbnew_spirorbis <- (dplyr::filter(qecbnew, Nb.spirorbis.total %in% c(NA, "NaN", "Inf", "-Inf"))) | |
365 qecbnew_spirorbis[, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")] <- sapply(qecbnew_spirorbis[, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")], as.character) | |
366 (spirobranchus_data <- subset(qecbnew_spirorbis, !is.na(qecbnew_spirorbis$Nb.spirorbis.1A) || !is.na(qecbnew_spirorbis$Nb.spirorbis.2A) || !is.na(qecbnew_spirorbis$Nb.spirorbis.3A) || !is.na(qecbnew_spirorbis$Nb.spirorbis.4A) || !is.na(qecbnew_spirorbis$Nb.spirorbis.5A))[, c("site_year_month_day", "Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A", "Nb.spirorbis.total")]) | |
367 | |
368 # In contrast to Spirobranchus data, no encoding issues for spirorbis data, cfr when sub-quadrat 1A-5A are ALL encoded, NA for total. | |
369 | |
370 # some more issues however with "x200"count data | |
371 | |
372 spirorbis <- subset(qecbnew, !is.na(qecbnew$Nb.spirorbis.1A) & !is.na(qecbnew$Nb.spirorbis.2A) & !is.na(qecbnew$Nb.spirorbis.3A) & !is.na(qecbnew$Nb.spirorbis.4A) & !is.na(qecbnew$Nb.spirorbis.5A) & !is.na(qecbnew$Nb.spirorbis.total))[, c("site_year_month_day", "Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A", "Nb.spirorbis.total")] | |
373 for (i in c(1:nrow(spirorbis))) { | |
374 spirorbis$mean.x.200[[i]] <- sum(spirorbis[i, c(2:6)], na.rm = TRUE) / sum(!is.na(spirorbis[i, c(2:6)])) * 200 | |
375 } | |
376 spirorbis$mean.x.200 <- unlist(spirorbis$mean.x.200) | |
377 spirorbis$Nb.spirorbis.total <- as.numeric(spirorbis$Nb.spirorbis.total) | |
378 for (i in c(1:nrow(spirorbis))) { | |
379 spirorbis$diff[[i]] <- spirorbis[i, "Nb.spirorbis.total"] - spirorbis[i, "mean.x.200"] | |
380 } | |
381 spirorbis$diff <- abs(as.integer(spirorbis$diff)) | |
382 spirorbis <- dplyr::arrange(spirorbis, desc(diff), mean.x.200) | |
383 (gonb_ilotstmichel_2015_04_18 <- dplyr::filter(spirorbis, site_year_month_day == "GONB_IlotStMichel.2015.04.18")) | |
384 rm(gonb_ilotstmichel_2015_04_18) | |
385 spirorbis <- dplyr::arrange(dplyr::filter(spirorbis, diff != 0 & mean.x.200 != 0), desc(diff)) | |
386 | |
387 # check it all in the qecbnew df | |
388 | |
389 for (i in c(1:nrow(qecbnew))) { | |
390 qecbnew$mean.x.200[[i]] <- sum(qecbnew[i, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")], na.rm = TRUE) / sum(!is.na(qecbnew[i, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")])) * 200 | |
391 } # sum of only NAs/0 = NaN; so replace NaN by Na | |
392 qecbnew$mean.x.200 <- as.character(qecbnew$mean.x.200) | |
393 | |
394 for (i in c(1:nrow(qecbnew))) { | |
395 qecbnew$mean.x.200[[i]] <- ifelse(qecbnew$mean.x.200[[i]] == "NaN", NA, qecbnew$mean.x.200[[i]]) | |
396 } | |
397 | |
398 qecbnew$mean.x.200 <- as.integer(qecbnew$mean.x.200) | |
399 | |
400 qecbnew$Nb.spirorbis.total <- as.integer(qecbnew$Nb.spirorbis.total) | |
401 qecbnew$Nb.spirorbis.total.diff <- abs((qecbnew$Nb.spirorbis.total - qecbnew$mean.x.200)) | |
402 spirorbis_diff <- qecbnew[, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A", "Nb.spirorbis.total", "Nb.spirorbis.total.ini", "mean.x.200", "Nb.spirorbis.total.diff")] | |
403 spirorbis_diff <- dplyr::arrange(spirorbis_diff, desc(Nb.spirorbis.total.diff), mean.x.200) | |
404 spirorbis_diff <- dplyr::arrange(dplyr::filter(spirorbis_diff, Nb.spirorbis.total.diff != 0 & mean.x.200 != 0), desc(Nb.spirorbis.total.diff)) | |
405 | |
406 qecbnew$Nb.spirorbis.total <- ifelse(qecbnew$Nb.spirorbis.total.diff != 0 & qecbnew$mean.x.200 != 0, qecbnew$mean.x.200, qecbnew$Nb.spirorbis.total) | |
407 spirorbis_diff <- qecbnew[, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A", "Nb.spirorbis.total", "Nb.spirorbis.total.ini", "mean.x.200", "Nb.spirorbis.total.diff")] | |
408 spirorbis_diff$Nb.spirorbis.total.diff <- abs(as.integer(spirorbis_diff$Nb.spirorbis.total.diff)) | |
409 spirorbis_diff <- dplyr::arrange(spirorbis_diff, desc(Nb.spirorbis.total.diff), mean.x.200) | |
410 spirorbis_diff <- dplyr::arrange(dplyr::filter(spirorbis_diff, Nb.spirorbis.total.diff != 0 & mean.x.200 != 0), desc(Nb.spirorbis.total.diff)) | |
411 # ok, change made when data x 200 was not correct. | |
412 | |
413 # finally, change NA by mean.x200 for spirorbis total | |
414 qecbnew$Nb.spirorbis.total <- as.character(qecbnew$Nb.spirorbis.total) | |
415 for (i in c(1:nrow(qecbnew))) { | |
416 qecbnew$Nb.spirorbis.total[[i]] <- ifelse(qecbnew$Nb.spirorbis.total[[i]] %in% c(NA, "NaN", "Inf", "-Inf"), sum(qecbnew[i, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")], na.rm = TRUE) / sum(!is.na(qecbnew[i, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A")])) * 200, qecbnew$Nb.spirorbis.total[[i]]) | |
417 } # sum of only NAs/0 = NaN; so replace NaN by Na | |
418 | |
419 for (i in c(1:nrow(qecbnew))) { | |
420 qecbnew$Nb.spirorbis.total[[i]] <- ifelse(qecbnew$Nb.spirorbis.total[[i]] == "NaN", NA, qecbnew$Nb.spirorbis.total[[i]]) | |
421 } | |
422 | |
423 qecbnew$Nb.spirorbis.total <- as.integer(qecbnew$Nb.spirorbis.total) | |
424 | |
425 qecbnew$Nb.spirorbis.total.diff <- abs(qecbnew$Nb.spirorbis.total - qecbnew$Nb.spirorbis.total.ini) | |
426 spirorbis_diff <- qecbnew[, c("Nb.spirorbis.1A", "Nb.spirorbis.2A", "Nb.spirorbis.3A", "Nb.spirorbis.4A", "Nb.spirorbis.5A", "Nb.spirorbis.total", "Nb.spirorbis.total.ini", "mean.x.200", "Nb.spirorbis.total.diff")] | |
427 spirorbis_diff <- dplyr::arrange(spirorbis_diff, desc(Nb.spirorbis.total.diff), mean.x.200) | |
428 spirorbis_diff <- dplyr::arrange(dplyr::filter(spirorbis_diff, Nb.spirorbis.total.diff != 0 & mean.x.200 != 0), desc(Nb.spirorbis.total.diff)) | |
429 | |
430 qecbnew <- subset(qecbnew, select = -c(Nb.spirorbis.total.ini, mean.x.200, Nb.spirorbis.total.diff)) | |
431 | |
432 rm(qecbnew_spirorbis, spirorbis, spirobranchus_data, spirorbis_diff, i) | |
433 | |
434 | |
435 # dplyr::filter for abnormal data, based on histogram distribution of data | |
436 | |
437 dplyr::filter(qecbnew, X..algues.brunes > 100)[, c("Site", "date_fiche", "Type.Bloc", "Numéro.Bloc.échantillon", "Face", "X..algues.brunes")] | |
438 qecbnew$X..algues.brunes <- ifelse(qecbnew$X..algues.brunes > 100, 100, qecbnew$X..algues.brunes) | |
439 dplyr::filter(qecbnew, X..algues.rouges > 100)[, c("Site", "date_fiche", "Type.Bloc", "Numéro.Bloc.échantillon", "Face", "X..algues.rouges")] | |
440 qecbnew$X..algues.rouges <- ifelse(qecbnew$X..algues.rouges > 100, 100, qecbnew$X..algues.rouges) | |
441 | |
442 | |
443 ## SCRIPT I - NAs <- 0 ; cfr previous comment makes no sense to have NA encoded when the presence of an organism is in reality = 0 | |
444 | |
445 # We are facing an issues with NA observations, because either they were not measured/counted, then they are effectively NAs; or these NAs = indeed "0"; but I cannot have NAs for variables that are included in the index determination, cfr if 5+0 = 5, 5+NA = NA; see for example site_year_month_day == "ARMO_Bilfot.2014.04.28", Nb.Spirobranchus.lamarckii.total is NA ... | |
446 # I theregore change these NAs by 0 | |
447 | |
448 # replace NAs by "0" for variables used in qecb determination | |
449 qecbnew[, c("X..algues.brunes", | |
450 "X..algues.rouges", | |
451 "X..Lithophyllum", | |
452 "X..Cladophora", | |
453 "Nb.Littorina.obtusata", | |
454 "Nb.Gibbula.cineraria", | |
455 "Nb.Gibbula.pennanti", | |
456 "Nb.Gibbula.umbilicalis", | |
457 "X..Eponges", | |
458 "X..Ascidies.Coloniales", | |
459 "X..Ascidies.Solitaires", | |
460 "X..Bryozoaires.Dresses", | |
461 "X..algues.vertes", | |
462 "X..Roche.Nue", | |
463 "Nb.spirorbis.total", | |
464 "X..Balanes.Vivantes", | |
465 "Nb.Spirobranchus.lamarckii.total", | |
466 "X..Surface.Accolement")] <- lapply(qecbnew[, | |
467 c("X..algues.brunes", | |
468 "X..algues.rouges", | |
469 "X..Lithophyllum", | |
470 "X..Cladophora", | |
471 "Nb.Littorina.obtusata", | |
472 "Nb.Gibbula.cineraria", | |
473 "Nb.Gibbula.pennanti", | |
474 "Nb.Gibbula.umbilicalis", | |
475 "X..Eponges", | |
476 "X..Ascidies.Coloniales", | |
477 "X..Ascidies.Solitaires", | |
478 "X..Bryozoaires.Dresses", | |
479 "X..algues.vertes", | |
480 "X..Roche.Nue", | |
481 "Nb.spirorbis.total", | |
482 "X..Balanes.Vivantes", | |
483 "Nb.Spirobranchus.lamarckii.total", | |
484 "X..Surface.Accolement")], | |
485 function(x) replace(x, is.na(x), 0)) | |
486 | |
487 # and also replace NA for bivalve by 0 for EGMP and BASQ surveys cfr for accollement correction later on. | |
488 | |
489 qecbnew$X..Mytilus.sp. <- ifelse((substr(qecbnew$Site, 1, 4) %in% c("EGMP", "BASQ")) & is.na(qecbnew$X..Mytilus.sp.), 0, qecbnew$X..Mytilus.sp.) | |
490 qecbnew$Nb.Crassostrea.gigas <- ifelse((substr(qecbnew$Site, 1, 4) %in% c("EGMP", "BASQ")) & is.na(qecbnew$Nb.Crassostrea.gigas), 0, qecbnew$Nb.Crassostrea.gigas) | |
491 qecbnew$Nb.Ostrea.edulis <- ifelse((substr(qecbnew$Site, 1, 4) %in% c("EGMP", "BASQ")) & is.na(qecbnew$Nb.Ostrea.edulis), 0, qecbnew$Nb.Ostrea.edulis) | |
492 | |
493 | |
494 # add a region variable | |
495 region <- rep(NA, nrow(qecbnew)) | |
496 qecbnew <- tibble::add_column(qecbnew, region, .after = "Site_bis") | |
497 qecbnew$region <- ifelse(qecbnew$Site %in% c("EGMP_GroinCou", "EGMP_PasEmsembert", "EGMP_BreeBains", "EGMP_PerreAntiochat", "EGMP_Chassiron", "BASQ_FlotsBleusZP", "BASQ_FlotsBleusZF"), "EGMP.BASQ", "Bretagne") | |
498 rm(region) | |
499 qecbnew <- dplyr::arrange(qecbnew, region, site_year_month_day, Type.Bloc, Numéro.Bloc.échantillon, Face) | |
500 | |
501 # accolement function according to recent 'retournement' | |
502 | |
503 ## before I go further ahead, I have to correct for surface d'accollement for several variable for BM.FI !! | |
504 | |
505 # not the same file name between script qecb script (qecbNew) and this script (qecbNew); doesn't matter, only appears here in the first dplyr::filter lines. | |
506 | |
507 qecbnew <- tibble::add_column(qecbnew, terri_ = substr(qecbnew$Site, 1, 4), .after = "Site_bis") | |
508 | |
509 qecbnew$X..Eponges_ini <- qecbnew$X..Eponges | |
510 qecbnew$X..Ascidies.Coloniales_ini <- qecbnew$X..Ascidies.Coloniales | |
511 qecbnew$X..Ascidies.Solitaires_ini <- qecbnew$X..Ascidies.Solitaires | |
512 qecbnew$X..Bryozoaires.Dresses_ini <- qecbnew$X..Bryozoaires.Dresses | |
513 qecbnew$X..Lithophyllum_ini <- qecbnew$X..Lithophyllum | |
514 qecbnew$X..Balanes.Vivantes_ini <- qecbnew$X..Balanes.Vivantes | |
515 | |
516 df_bm_fs <- qecbnew %>% dplyr::filter(Type.Bloc == "Bloc mobile" & Face == "face supérieure") | |
517 df_bm_fi <- qecbnew %>% dplyr::filter(Type.Bloc == "Bloc mobile" & Face == "face inférieure") | |
518 df_bf <- qecbnew %>% dplyr::filter(Type.Bloc != "Bloc mobile") | |
519 | |
520 `%notin%` <- Negate(`%in%`) | |
521 | |
522 acco_fct <- function(var_) { | |
523 | |
524 df_bm_fi$var_cor.acco. <<- NA | |
525 | |
526 for (i in c(1:nrow(df_bm_fi))) { | |
527 | |
528 df_bm_fi$var_cor.acco.[[i]] <<- if (df_bm_fi$terri_[[i]] %notin% c("EGMP", "BASQ")) { | |
529 ifelse(#df_$Couleur.dominante %in% c("Rouge", "Brune", "Brune-Rouge") || | |
530 df_bm_fs$Couleur.dominante[[i]] %in% c("Blanche", "Verte", "Blanche-Verte", "Colorée"), df_bm_fi[i, var_] / (100 - df_bm_fi$X..Surface.Accolement[[i]]) * 100, df_bm_fi[i, var_]) | |
531 } else { | |
532 ifelse(df_bm_fs$Couleur.dominante[[i]] %in% c("Blanche", "Verte", "Blanche-Verte", "Colorée") | |
533 & df_bm_fi$X..Surface.Accolement[[i]] != 0 # I have to use it in dplyr::filter this time as well for EGMP- BASQ (but not for Bretagne, although could be added, same result); identical/repeated measure for BM.FI and BM.FS | |
534 & df_bm_fs$X..Mytilus.sp.[[i]] == 0, df_bm_fi[i, var_] / (100 - df_bm_fi$X..Surface.Accolement[[i]]) * 100, df_bm_fi[i, var_]) | |
535 } | |
536 | |
537 } | |
538 | |
539 } | |
540 | |
541 # I would only consider colors in c("Rouge", "Brune", "Brune-Rouge") for BM.FI correction [ and not the series c("Blanche-Brune", "Rouge", "Brune", "Blanche-Rouge", "Brune-Rouge", "Rouge-Verte", "Brune-Verte") ] ; and for BM.FS, the list c("Blanche", "Verte", "Colorée") => we do the correction for BM.FI accollement based on BM.FS color !!! | |
542 | |
543 | |
544 # apply acco_fct to BM.FI variables | |
545 | |
546 apply_acco_fct <- function(var_) { | |
547 | |
548 show(sort(df_bm_fi[, var_], decreasing = TRUE, index.return = FALSE)[1:50]) | |
549 pre_ <- as.vector(df_bm_fi[, var_]) | |
550 acco_fct(var_) | |
551 df_bm_fi <<- tibble::add_column(df_bm_fi, var_cor. = df_bm_fi$var_cor.acco., .after = var_) | |
552 show(sort(df_bm_fi$var_cor., decreasing = TRUE, index.return = FALSE)[1:50]) | |
553 df_bm_fi$var_cor. <<- as.numeric(ifelse(as.character(df_bm_fi$var_cor.) %in% c(NA, "NaN", "-Inf", "Inf"), "0", as.character(df_bm_fi$var_cor.))) | |
554 df_bm_fi$var_cor. <<- ifelse(df_bm_fi$var_cor. > 100, 100, df_bm_fi$var_cor.) | |
555 show(sort(df_bm_fi$var_cor., decreasing = TRUE, index.return = FALSE)[1:50]) | |
556 show(length(na.omit(which(abs(as.vector(df_bm_fi$var_cor.) - pre_) != 0))) / na.omit(length(df_bm_fi$var_cor.)) * 100) | |
557 par(mfrow = c(1, 3)) | |
558 hist(pre_, main = var_, xlab = "pre-corection") | |
559 hist(df_bm_fi$var_cor., main = var_, xlab = "post-corection") | |
560 hist(df_bm_fi[as.vector(which(abs(as.vector(df_bm_fi$var_cor.) - pre_) != 0)), var_], main = var_, xlab = "diff. post-pre != 0") | |
561 par(mfrow = c(1, 1)) | |
562 df_bm_fi <<- df_bm_fi[, -which(names(df_bm_fi) %in% c(var_, "var_cor.acco."))] | |
563 colnames(df_bm_fi)[colnames(df_bm_fi) == "var_cor."] <<- var_ | |
564 | |
565 rm(pre_) | |
566 | |
567 } | |
568 | |
569 apply_acco_fct("X..Eponges") | |
570 apply_acco_fct("X..Ascidies.Coloniales") | |
571 apply_acco_fct("X..Ascidies.Solitaires") | |
572 apply_acco_fct("X..Bryozoaires.Dresses") | |
573 apply_acco_fct("X..Lithophyllum") | |
574 apply_acco_fct("X..Balanes.Vivantes") | |
575 | |
576 qecbnew <- dplyr::bind_rows(df_bm_fs, df_bm_fi) | |
577 qecbnew <- dplyr::bind_rows(qecbnew, df_bf) | |
578 | |
579 qecbnew <- dplyr::arrange(qecbnew, region, site_year_month_day, Type.Bloc, Numéro.Bloc.échantillon, Face) | |
580 | |
581 # do remove some more data ... | |
582 # "FINS_Quemenes.2020.10.16", bad encoding, I let know Anna Capietto to make changes => was corrected normally, so unactivate next time I download ESTAMP data | |
583 qecbnew <- dplyr::filter(qecbnew, site_year_month_day != "FINS_Quemenes.2020.10.16") | |
584 | |
585 # save the final qecbnew df_ | |
586 | |
587 qecb <- qecbnew | |
588 | |
589 | |
590 `%notin%` <- Negate(`%in%`) | |
591 | |
592 ## reorder and/or create new variables | |
593 | |
594 # variable site_year_month_day moved for clarity purpose, not needed necessarily | |
595 qecb <- tibble::add_column(qecb, qecb$site_year_month_day, .after = "Site_bis") | |
596 qecb <- qecb[, -which(names(qecb) %in% c("site_year_month_day"))] | |
597 qecb <- dplyr::rename(qecb, site_year_month_day = `qecb$site_year_month_day`) | |
598 | |
599 # new variable period (nothing to see with the existing périod variable) | |
600 period <- rep(NA, nrow(qecb)) | |
601 qecb <- tibble::add_column(qecb, period, .after = "Day") | |
602 qecb$period <- ifelse(as.numeric(qecb$Month) < 7, "p1", "p2") | |
603 qecb$period <- as.factor(qecb$period) | |
604 rm(period) | |
605 | |
606 qecb <- dplyr::arrange(qecb, region, site_year_month_day, Type.Bloc, Numéro.Bloc.échantillon, Face) | |
607 | |
608 # NB: les infos surface d'accolement sont dupliquées de la face inf vers la face sup de blocs mobiles (même si peu de sens d'avoir cette info pour les face sup ...) | |
609 # NB: les data "Abondance ressources ciblées par pêcheurs à pied" présentes uniquement pour les blocs mobiles sont dupliquées entre face inf et face sup. | |
610 | |
611 ## SCRIPT I - NAs <- 0 | |
612 | |
613 # already performed for part in the CB_qecb script ; but here I also consider mobile organisms, logical observation (or not) according to boulders, faces etc ... so more complete. Could be some kind of script fusion to only keep Na to 0 correction in one script, i.e. moving this script to the CB_qecb script ... | |
614 | |
615 bretagne_bm <- dplyr::filter(qecb, region == "Bretagne" & Type.Bloc == "Bloc mobile") | |
616 bretagne_bf <- dplyr::filter(qecb, region == "Bretagne" & Type.Bloc %in% c("Bloc fixé", "Roche en place")) | |
617 egmp_basq_bm <- dplyr::filter(qecb, region == "EGMP.BASQ" & Type.Bloc == "Bloc mobile") | |
618 egmp_basq_bf <- dplyr::filter(qecb, region == "EGMP.BASQ" & Type.Bloc %in% c("Bloc fixé", "Roche en place")) | |
619 | |
620 # replace NAs by "0" for variables used in qecb determination | |
621 | |
622 bretagne_bm[, c( | |
623 "X..algues.brunes", | |
624 "Strate.algues.brunes", | |
625 "X..algues.rouges", | |
626 "Strate.algues.rouges", | |
627 "X..algues.vertes", | |
628 "Strate.algues.vertes", | |
629 "X..Cladophora", | |
630 "X..Lithophyllum", | |
631 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well. | |
632 #"Type.Sediment", | |
633 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well. | |
634 "Nb.Littorina.obtusata", | |
635 "Nb.Gibbula.cineraria", | |
636 "Nb.Gibbula.pennanti", | |
637 "Nb.Gibbula.umbilicalis", | |
638 "Nb.Phallusia.mamillata", | |
639 "Nb.Tethya.aurantium", | |
640 #"Nb.Spirobranchus.lamarckii.1B", | |
641 #"Nb.Spirobranchus.lamarckii.2B", | |
642 #"Nb.Spirobranchus.lamarckii.3B", | |
643 #"Nb.Spirobranchus.lamarckii.4B", | |
644 #"Nb.Spirobranchus.lamarckii.5B", | |
645 "Nb.Spirobranchus.lamarckii.total", | |
646 #"Nb.spirorbis.1A", | |
647 #"Nb.spirorbis.2A", | |
648 #"Nb.spirorbis.3A", | |
649 #"Nb.spirorbis.4A", | |
650 #"Nb.spirorbis.5A", | |
651 "Nb.spirorbis.total", | |
652 #.."Nb.Crassostrea.gigas", | |
653 #.."Nb.Ostrea.edulis", | |
654 #.."X..Mytilus.sp.", | |
655 #.."X..Hermelles", | |
656 #.."X..Hydraires", | |
657 "X..Eponges", | |
658 "X..Ascidies.Coloniales", | |
659 "X..Ascidies.Solitaires", | |
660 "X..Bryozoaires.Dresses", | |
661 "X..Balanes.Vivantes", | |
662 #"Commentaires.Avant", | |
663 "X..Surface.Accolement", # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well. | |
664 #"Type.sustrat.observé", | |
665 #"Commentaires", | |
666 "Nb.Cancer.pagurus..Tourteau.", | |
667 "Nb.Necora.puber..Etrille.", | |
668 "Nb.Carcinus.maenas..Crabe.vert.", | |
669 "Nb.Nucella.lapilus..Pourpre.", | |
670 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.", | |
671 #.."Nb.Octopus.vulgaris..Poulpe.", | |
672 "Nb.Galathea..Galathées.", | |
673 #.."Nb.Paracentrotus.lividus..Oursin.", | |
674 "Nb.Lophozozymus.incisus..ancien.Xantho.incisus.", | |
675 "Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.", | |
676 "Nb.Haliotis.tuberculata..Ormeau.", | |
677 #"Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.", | |
678 "Nb.Littorina.littorea..Bigorneau.", | |
679 "Nb.Xantho.pilipes..Xanthe.poilu.", | |
680 "Nb.Mimachlamys.varia..Pétoncle.noir." | |
681 ) | |
682 ] <- lapply(bretagne_bm[, c( | |
683 "X..algues.brunes", | |
684 "Strate.algues.brunes", | |
685 "X..algues.rouges", | |
686 "Strate.algues.rouges", | |
687 "X..algues.vertes", | |
688 "Strate.algues.vertes", | |
689 "X..Cladophora", | |
690 "X..Lithophyllum", | |
691 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well. | |
692 #"Type.Sediment", | |
693 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well. | |
694 "Nb.Littorina.obtusata", | |
695 "Nb.Gibbula.cineraria", | |
696 "Nb.Gibbula.pennanti", | |
697 "Nb.Gibbula.umbilicalis", | |
698 "Nb.Phallusia.mamillata", | |
699 "Nb.Tethya.aurantium", | |
700 #"Nb.Spirobranchus.lamarckii.1B", | |
701 #"Nb.Spirobranchus.lamarckii.2B", | |
702 #"Nb.Spirobranchus.lamarckii.3B", | |
703 #"Nb.Spirobranchus.lamarckii.4B", | |
704 #"Nb.Spirobranchus.lamarckii.5B", | |
705 "Nb.Spirobranchus.lamarckii.total", | |
706 #"Nb.spirorbis.1A", | |
707 #"Nb.spirorbis.2A", | |
708 #"Nb.spirorbis.3A", | |
709 #"Nb.spirorbis.4A", | |
710 #"Nb.spirorbis.5A", | |
711 "Nb.spirorbis.total", | |
712 #.."Nb.Crassostrea.gigas", | |
713 #.."Nb.Ostrea.edulis", | |
714 #.."X..Mytilus.sp.", | |
715 #.."X..Hermelles", | |
716 #.."X..Hydraires", | |
717 "X..Eponges", | |
718 "X..Ascidies.Coloniales", | |
719 "X..Ascidies.Solitaires", | |
720 "X..Bryozoaires.Dresses", | |
721 "X..Balanes.Vivantes", | |
722 #"Commentaires.Avant", | |
723 "X..Surface.Accolement", # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well. | |
724 #"Type.sustrat.observé", | |
725 #"Commentaires", | |
726 "Nb.Cancer.pagurus..Tourteau.", | |
727 "Nb.Necora.puber..Etrille.", | |
728 "Nb.Carcinus.maenas..Crabe.vert.", | |
729 "Nb.Nucella.lapilus..Pourpre.", | |
730 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.", | |
731 #.."Nb.Octopus.vulgaris..Poulpe.", | |
732 "Nb.Galathea..Galathées.", | |
733 #.."Nb.Paracentrotus.lividus..Oursin.", | |
734 "Nb.Lophozozymus.incisus..ancien.Xantho.incisus.", | |
735 "Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.", | |
736 "Nb.Haliotis.tuberculata..Ormeau.", | |
737 #"Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.", | |
738 "Nb.Littorina.littorea..Bigorneau.", | |
739 "Nb.Xantho.pilipes..Xanthe.poilu.", | |
740 "Nb.Mimachlamys.varia..Pétoncle.noir." | |
741 ) | |
742 ], function(x) replace(x, is.na(x), 0)) | |
743 | |
744 | |
745 # bretagne_bf | |
746 bretagne_bf[, c( | |
747 "X..algues.brunes", | |
748 "Strate.algues.brunes", | |
749 "X..algues.rouges", | |
750 "Strate.algues.rouges", | |
751 "X..algues.vertes", | |
752 "Strate.algues.vertes", | |
753 "X..Cladophora", | |
754 "X..Lithophyllum", | |
755 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well. | |
756 #"Type.Sediment", | |
757 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well. | |
758 "Nb.Littorina.obtusata", | |
759 "Nb.Gibbula.cineraria", | |
760 "Nb.Gibbula.pennanti", | |
761 "Nb.Gibbula.umbilicalis", | |
762 "Nb.Phallusia.mamillata", | |
763 "Nb.Tethya.aurantium", | |
764 #"Nb.Spirobranchus.lamarckii.1B", | |
765 #"Nb.Spirobranchus.lamarckii.2B", | |
766 #"Nb.Spirobranchus.lamarckii.3B", | |
767 #"Nb.Spirobranchus.lamarckii.4B", | |
768 #"Nb.Spirobranchus.lamarckii.5B", | |
769 "Nb.Spirobranchus.lamarckii.total", | |
770 #"Nb.spirorbis.1A", | |
771 #"Nb.spirorbis.2A", | |
772 #"Nb.spirorbis.3A", | |
773 #"Nb.spirorbis.4A", | |
774 #"Nb.spirorbis.5A", | |
775 "Nb.spirorbis.total", | |
776 #.."Nb.Crassostrea.gigas", | |
777 #.."Nb.Ostrea.edulis", | |
778 #.."X..Mytilus.sp.", | |
779 #.."X..Hermelles", | |
780 #.."X..Hydraires", | |
781 "X..Eponges", | |
782 "X..Ascidies.Coloniales", | |
783 "X..Ascidies.Solitaires", | |
784 "X..Bryozoaires.Dresses", | |
785 "X..Balanes.Vivantes", | |
786 #"Commentaires.Avant", | |
787 "X..Surface.Accolement"#, # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well. | |
788 #"Type.sustrat.observé", | |
789 #"Commentaires", | |
790 #."Nb.Cancer.pagurus..Tourteau.", | |
791 #.."Nb.Necora.puber..Etrille.", | |
792 #."Nb.Carcinus.maenas..Crabe.vert.", | |
793 #."Nb.Nucella.lapilus..Pourpre.", | |
794 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.", | |
795 #.."Nb.Octopus.vulgaris..Poulpe.", | |
796 #."Nb.Galathea..Galathées.", | |
797 #.."Nb.Paracentrotus.lividus..Oursin.", | |
798 #."Nb.Lophozozymus.incisus..ancien.Xantho.incisus.", | |
799 #."Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.", | |
800 #."Nb.Haliotis.tuberculata..Ormeau.", | |
801 #."Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.", | |
802 #."Nb.Littorina.littorea..Bigorneau.", | |
803 #."Nb.Xantho.pilipes..Xanthe.poilu.", | |
804 #."Nb.Mimachlamys.varia..Pétoncle.noir." | |
805 ) | |
806 ] <- lapply(bretagne_bf[, c( | |
807 "X..algues.brunes", | |
808 "Strate.algues.brunes", | |
809 "X..algues.rouges", | |
810 "Strate.algues.rouges", | |
811 "X..algues.vertes", | |
812 "Strate.algues.vertes", | |
813 "X..Cladophora", | |
814 "X..Lithophyllum", | |
815 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well. | |
816 #"Type.Sediment", | |
817 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well. | |
818 "Nb.Littorina.obtusata", | |
819 "Nb.Gibbula.cineraria", | |
820 "Nb.Gibbula.pennanti", | |
821 "Nb.Gibbula.umbilicalis", | |
822 "Nb.Phallusia.mamillata", | |
823 "Nb.Tethya.aurantium", | |
824 #"Nb.Spirobranchus.lamarckii.1B", | |
825 #"Nb.Spirobranchus.lamarckii.2B", | |
826 #"Nb.Spirobranchus.lamarckii.3B", | |
827 #"Nb.Spirobranchus.lamarckii.4B", | |
828 #"Nb.Spirobranchus.lamarckii.5B", | |
829 "Nb.Spirobranchus.lamarckii.total", | |
830 #"Nb.spirorbis.1A", | |
831 #"Nb.spirorbis.2A", | |
832 #"Nb.spirorbis.3A", | |
833 #"Nb.spirorbis.4A", | |
834 #"Nb.spirorbis.5A", | |
835 "Nb.spirorbis.total", | |
836 #.."Nb.Crassostrea.gigas", | |
837 #.."Nb.Ostrea.edulis", | |
838 #.."X..Mytilus.sp.", | |
839 #.."X..Hermelles", | |
840 #.."X..Hydraires", | |
841 "X..Eponges", | |
842 "X..Ascidies.Coloniales", | |
843 "X..Ascidies.Solitaires", | |
844 "X..Bryozoaires.Dresses", | |
845 "X..Balanes.Vivantes", | |
846 #"Commentaires.Avant", | |
847 "X..Surface.Accolement"#, # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well. | |
848 #"Type.sustrat.observé", | |
849 #"Commentaires", | |
850 #."Nb.Cancer.pagurus..Tourteau.", | |
851 #.."Nb.Necora.puber..Etrille.", | |
852 #."Nb.Carcinus.maenas..Crabe.vert.", | |
853 #."Nb.Nucella.lapilus..Pourpre.", | |
854 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.", | |
855 #.."Nb.Octopus.vulgaris..Poulpe.", | |
856 #."Nb.Galathea..Galathées.", | |
857 #.."Nb.Paracentrotus.lividus..Oursin.", | |
858 #."Nb.Lophozozymus.incisus..ancien.Xantho.incisus.", | |
859 #."Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.", | |
860 #."Nb.Haliotis.tuberculata..Ormeau.", | |
861 #."Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.", | |
862 #."Nb.Littorina.littorea..Bigorneau.", | |
863 #."Nb.Xantho.pilipes..Xanthe.poilu.", | |
864 #."Nb.Mimachlamys.varia..Pétoncle.noir." | |
865 ) | |
866 ], function(x) replace(x, is.na(x), 0)) | |
867 | |
868 | |
869 # egmp_basq_bm | |
870 egmp_basq_bm[, c( | |
871 "X..algues.brunes", | |
872 "Strate.algues.brunes", | |
873 "X..algues.rouges", | |
874 "Strate.algues.rouges", | |
875 "X..algues.vertes", | |
876 "Strate.algues.vertes", | |
877 "X..Cladophora", | |
878 "X..Lithophyllum", | |
879 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well. | |
880 #"Type.Sediment", | |
881 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well. | |
882 "Nb.Littorina.obtusata", | |
883 "Nb.Gibbula.cineraria", | |
884 "Nb.Gibbula.pennanti", | |
885 "Nb.Gibbula.umbilicalis", | |
886 "Nb.Phallusia.mamillata", | |
887 "Nb.Tethya.aurantium", | |
888 #"Nb.Spirobranchus.lamarckii.1B", | |
889 #"Nb.Spirobranchus.lamarckii.2B", | |
890 #"Nb.Spirobranchus.lamarckii.3B", | |
891 #"Nb.Spirobranchus.lamarckii.4B", | |
892 #"Nb.Spirobranchus.lamarckii.5B", | |
893 "Nb.Spirobranchus.lamarckii.total", | |
894 #"Nb.spirorbis.1A", | |
895 #"Nb.spirorbis.2A", | |
896 #"Nb.spirorbis.3A", | |
897 #"Nb.spirorbis.4A", | |
898 #"Nb.spirorbis.5A", | |
899 "Nb.spirorbis.total", | |
900 "Nb.Crassostrea.gigas", | |
901 "Nb.Ostrea.edulis", | |
902 "X..Mytilus.sp.", | |
903 "X..Hermelles", | |
904 "X..Hydraires", | |
905 "X..Eponges", | |
906 "X..Ascidies.Coloniales", | |
907 "X..Ascidies.Solitaires", | |
908 "X..Bryozoaires.Dresses", | |
909 "X..Balanes.Vivantes", | |
910 #"Commentaires.Avant", | |
911 "X..Surface.Accolement", # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well. | |
912 #"Type.sustrat.observé", | |
913 #"Commentaires", | |
914 "Nb.Cancer.pagurus..Tourteau.", | |
915 "Nb.Necora.puber..Etrille.", | |
916 "Nb.Carcinus.maenas..Crabe.vert.", | |
917 "Nb.Nucella.lapilus..Pourpre.", | |
918 "Nb.Eriphia.verrucosa..Crabe.verruqueux.", | |
919 "Nb.Octopus.vulgaris..Poulpe.", | |
920 "Nb.Galathea..Galathées.", | |
921 "Nb.Paracentrotus.lividus..Oursin.", | |
922 "Nb.Lophozozymus.incisus..ancien.Xantho.incisus.", | |
923 "Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.", | |
924 "Nb.Haliotis.tuberculata..Ormeau.", | |
925 "Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.", | |
926 "Nb.Littorina.littorea..Bigorneau.", | |
927 "Nb.Xantho.pilipes..Xanthe.poilu.", | |
928 "Nb.Mimachlamys.varia..Pétoncle.noir." | |
929 ) | |
930 ] <- lapply(egmp_basq_bm[, c( | |
931 "X..algues.brunes", | |
932 "Strate.algues.brunes", | |
933 "X..algues.rouges", | |
934 "Strate.algues.rouges", | |
935 "X..algues.vertes", | |
936 "Strate.algues.vertes", | |
937 "X..Cladophora", | |
938 "X..Lithophyllum", | |
939 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well. | |
940 #"Type.Sediment", | |
941 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well. | |
942 "Nb.Littorina.obtusata", | |
943 "Nb.Gibbula.cineraria", | |
944 "Nb.Gibbula.pennanti", | |
945 "Nb.Gibbula.umbilicalis", | |
946 "Nb.Phallusia.mamillata", | |
947 "Nb.Tethya.aurantium", | |
948 #"Nb.Spirobranchus.lamarckii.1B", | |
949 #"Nb.Spirobranchus.lamarckii.2B", | |
950 #"Nb.Spirobranchus.lamarckii.3B", | |
951 #"Nb.Spirobranchus.lamarckii.4B", | |
952 #"Nb.Spirobranchus.lamarckii.5B", | |
953 "Nb.Spirobranchus.lamarckii.total", | |
954 #"Nb.spirorbis.1A", | |
955 #"Nb.spirorbis.2A", | |
956 #"Nb.spirorbis.3A", | |
957 #"Nb.spirorbis.4A", | |
958 #"Nb.spirorbis.5A", | |
959 "Nb.spirorbis.total", | |
960 "Nb.Crassostrea.gigas", | |
961 "Nb.Ostrea.edulis", | |
962 "X..Mytilus.sp.", | |
963 "X..Hermelles", | |
964 "X..Hydraires", | |
965 "X..Eponges", | |
966 "X..Ascidies.Coloniales", | |
967 "X..Ascidies.Solitaires", | |
968 "X..Bryozoaires.Dresses", | |
969 "X..Balanes.Vivantes", | |
970 #"Commentaires.Avant", | |
971 "X..Surface.Accolement", # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well. | |
972 #"Type.sustrat.observé", | |
973 #"Commentaires", | |
974 "Nb.Cancer.pagurus..Tourteau.", | |
975 "Nb.Necora.puber..Etrille.", | |
976 "Nb.Carcinus.maenas..Crabe.vert.", | |
977 "Nb.Nucella.lapilus..Pourpre.", | |
978 "Nb.Eriphia.verrucosa..Crabe.verruqueux.", | |
979 "Nb.Octopus.vulgaris..Poulpe.", | |
980 "Nb.Galathea..Galathées.", | |
981 "Nb.Paracentrotus.lividus..Oursin.", | |
982 "Nb.Lophozozymus.incisus..ancien.Xantho.incisus.", | |
983 "Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.", | |
984 "Nb.Haliotis.tuberculata..Ormeau.", | |
985 "Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.", | |
986 "Nb.Littorina.littorea..Bigorneau.", | |
987 "Nb.Xantho.pilipes..Xanthe.poilu.", | |
988 "Nb.Mimachlamys.varia..Pétoncle.noir." | |
989 ) | |
990 ], function(x) replace(x, is.na(x), 0)) | |
991 | |
992 | |
993 # egmp_basq_bf | |
994 egmp_basq_bf[, c( | |
995 "X..algues.brunes", | |
996 "Strate.algues.brunes", | |
997 "X..algues.rouges", | |
998 "Strate.algues.rouges", | |
999 "X..algues.vertes", | |
1000 "Strate.algues.vertes", | |
1001 "X..Cladophora", | |
1002 "X..Lithophyllum", | |
1003 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well. | |
1004 #"Type.Sediment", | |
1005 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well. | |
1006 "Nb.Littorina.obtusata", | |
1007 "Nb.Gibbula.cineraria", | |
1008 "Nb.Gibbula.pennanti", | |
1009 "Nb.Gibbula.umbilicalis", | |
1010 "Nb.Phallusia.mamillata", | |
1011 "Nb.Tethya.aurantium", | |
1012 #"Nb.Spirobranchus.lamarckii.1B", | |
1013 #"Nb.Spirobranchus.lamarckii.2B", | |
1014 #"Nb.Spirobranchus.lamarckii.3B", | |
1015 #"Nb.Spirobranchus.lamarckii.4B", | |
1016 #"Nb.Spirobranchus.lamarckii.5B", | |
1017 "Nb.Spirobranchus.lamarckii.total", | |
1018 #"Nb.spirorbis.1A", | |
1019 #"Nb.spirorbis.2A", | |
1020 #"Nb.spirorbis.3A", | |
1021 #"Nb.spirorbis.4A", | |
1022 #"Nb.spirorbis.5A", | |
1023 "Nb.spirorbis.total", | |
1024 "Nb.Crassostrea.gigas", | |
1025 "Nb.Ostrea.edulis", | |
1026 "X..Mytilus.sp.", | |
1027 "X..Hermelles", | |
1028 "X..Hydraires", | |
1029 "X..Eponges", | |
1030 "X..Ascidies.Coloniales", | |
1031 "X..Ascidies.Solitaires", | |
1032 "X..Bryozoaires.Dresses", | |
1033 "X..Balanes.Vivantes", | |
1034 #"Commentaires.Avant", | |
1035 "X..Surface.Accolement"#, # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well. | |
1036 #"Type.sustrat.observé", | |
1037 #"Commentaires", | |
1038 #."Nb.Cancer.pagurus..Tourteau.", | |
1039 #.."Nb.Necora.puber..Etrille.", | |
1040 #."Nb.Carcinus.maenas..Crabe.vert.", | |
1041 #."Nb.Nucella.lapilus..Pourpre.", | |
1042 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.", | |
1043 #.."Nb.Octopus.vulgaris..Poulpe.", | |
1044 #."Nb.Galathea..Galathées.", | |
1045 #.."Nb.Paracentrotus.lividus..Oursin.", | |
1046 #."Nb.Lophozozymus.incisus..ancien.Xantho.incisus.", | |
1047 #."Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.", | |
1048 #."Nb.Haliotis.tuberculata..Ormeau.", | |
1049 #."Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.", | |
1050 #."Nb.Littorina.littorea..Bigorneau.", | |
1051 #."Nb.Xantho.pilipes..Xanthe.poilu.", | |
1052 #."Nb.Mimachlamys.varia..Pétoncle.noir." | |
1053 ) | |
1054 ] <- lapply(egmp_basq_bf[, c( | |
1055 "X..algues.brunes", | |
1056 "Strate.algues.brunes", | |
1057 "X..algues.rouges", | |
1058 "Strate.algues.rouges", | |
1059 "X..algues.vertes", | |
1060 "Strate.algues.vertes", | |
1061 "X..Cladophora", | |
1062 "X..Lithophyllum", | |
1063 "X..Recouvrement.Sediment", # is NA, then replace by 0 as well because no sense to have a NA value for "% recouvrement sédiment" as well. | |
1064 #"Type.Sediment", | |
1065 "X..Roche.Nue", # is NA, then replace by 0 as well because no sense to have a NA value for "% roche nue" as well. | |
1066 "Nb.Littorina.obtusata", | |
1067 "Nb.Gibbula.cineraria", | |
1068 "Nb.Gibbula.pennanti", | |
1069 "Nb.Gibbula.umbilicalis", | |
1070 "Nb.Phallusia.mamillata", | |
1071 "Nb.Tethya.aurantium", | |
1072 #"Nb.Spirobranchus.lamarckii.1B", | |
1073 #"Nb.Spirobranchus.lamarckii.2B", | |
1074 #"Nb.Spirobranchus.lamarckii.3B", | |
1075 #"Nb.Spirobranchus.lamarckii.4B", | |
1076 #"Nb.Spirobranchus.lamarckii.5B", | |
1077 "Nb.Spirobranchus.lamarckii.total", | |
1078 #"Nb.spirorbis.1A", | |
1079 #"Nb.spirorbis.2A", | |
1080 #"Nb.spirorbis.3A", | |
1081 #"Nb.spirorbis.4A", | |
1082 #"Nb.spirorbis.5A", | |
1083 "Nb.spirorbis.total", | |
1084 "Nb.Crassostrea.gigas", | |
1085 "Nb.Ostrea.edulis", | |
1086 "X..Mytilus.sp.", | |
1087 "X..Hermelles", | |
1088 "X..Hydraires", | |
1089 "X..Eponges", | |
1090 "X..Ascidies.Coloniales", | |
1091 "X..Ascidies.Solitaires", | |
1092 "X..Bryozoaires.Dresses", | |
1093 "X..Balanes.Vivantes", | |
1094 #"Commentaires.Avant", | |
1095 "X..Surface.Accolement"#, # is NA, then replace by 0 as well because no sense to have a NA value for "% surface accolement" as well. | |
1096 #"Type.sustrat.observé", | |
1097 #"Commentaires", | |
1098 #."Nb.Cancer.pagurus..Tourteau.", | |
1099 #.."Nb.Necora.puber..Etrille.", | |
1100 #."Nb.Carcinus.maenas..Crabe.vert.", | |
1101 #."Nb.Nucella.lapilus..Pourpre.", | |
1102 #.."Nb.Eriphia.verrucosa..Crabe.verruqueux.", | |
1103 #.."Nb.Octopus.vulgaris..Poulpe.", | |
1104 #."Nb.Galathea..Galathées.", | |
1105 #.."Nb.Paracentrotus.lividus..Oursin.", | |
1106 #."Nb.Lophozozymus.incisus..ancien.Xantho.incisus.", | |
1107 #."Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.", | |
1108 #."Nb.Haliotis.tuberculata..Ormeau.", | |
1109 #."Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.", | |
1110 #."Nb.Littorina.littorea..Bigorneau.", | |
1111 #."Nb.Xantho.pilipes..Xanthe.poilu.", | |
1112 #."Nb.Mimachlamys.varia..Pétoncle.noir." | |
1113 ) | |
1114 ], function(x) replace(x, is.na(x), 0)) | |
1115 | |
1116 | |
1117 # merge dfs. | |
1118 qecbnato0 <- dplyr::bind_rows(bretagne_bm, bretagne_bf) | |
1119 qecbnato0 <- dplyr::bind_rows(qecbnato0, egmp_basq_bm) | |
1120 qecbnato0 <- dplyr::bind_rows(qecbnato0, egmp_basq_bf) | |
1121 | |
1122 qecbnato0 <- dplyr::arrange(qecbnato0, region, site_year_month_day, Type.Bloc, Numéro.Bloc.échantillon, Face) | |
1123 | |
1124 rm(bretagne_bm, bretagne_bf, egmp_basq_bm, egmp_basq_bf) | |
1125 | |
1126 | |
1127 ## analyse matricielle | |
1128 | |
1129 # NB some variables were dplyr::renamed or created, cfr I originally merged qecb and ivr data in below script to do some correlation analysis. This is not the case anymore, so no more merging anymore. | |
1130 | |
1131 qecbnato0 <- tibble::add_column(qecbnato0, region.site_year_month_day = paste0(qecbnato0$region, qecbnato0$site_year_month_day), .before = "region") | |
1132 | |
1133 | |
1134 numero_quadrat <- stringr::str_sub(qecbnato0$quadrat_bis, start = -1) | |
1135 qecbnato0 <- tibble::add_column(qecbnato0, numero_quadrat, .after = "quadrat_bis") | |
1136 rm(numero_quadrat) | |
1137 qecbnato0$numero_quadrat <- as.integer(qecbnato0$numero_quadrat) | |
1138 | |
1139 | |
1140 qecbnato0$Year <- as.integer(qecbnato0$Year) | |
1141 qecbnato0$Month <- as.integer(qecbnato0$Month) | |
1142 qecbnato0$Day <- as.integer(qecbnato0$Day) | |
1143 | |
1144 ############################################################ | |
1145 #!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
1146 # Anna still hasn't corrected for boulder nb in FINS_Quemenes.2020.10.16 data encoding ! removed from the df_ | |
1147 qecbnato0 <- qecbnato0 %>% dplyr::filter(site_year_month_day != "FINS_Quemenes.2020.10.16") | |
1148 ############################################################ | |
1149 | |
1150 # what to do with spirorbes & Nb.Spirobranchus.lamarckii.total? log10 transformation | |
1151 | |
1152 qecbnato0 <- tibble::add_column(qecbnato0, log10.Nb.spirorbis.total = log10(qecbnato0$Nb.spirorbis.total + 1), .after = "Nb.spirorbis.total") | |
1153 qecbnato0 <- tibble::add_column(qecbnato0, log10.Nb.Spirobranchus.lamarckii.total = log10(qecbnato0$Nb.Spirobranchus.lamarckii.total + 1), .after = "Nb.Spirobranchus.lamarckii.total") | |
1154 | |
1155 saveRDS(qecbnato0, "qecbnato0.RDS") | |
1156 | |
1157 | |
1158 ############################################################### | |
1159 # # | |
1160 # Start dissimilarity calculation with some data handling # | |
1161 # # | |
1162 ############################################################### | |
1163 | |
1164 # first, create vector (4) for qecb and fishing by region (same as above) | |
1165 | |
1166 bret_egmp_basq_qecb <- c( | |
1167 "X..algues.brunes", | |
1168 "X..algues.rouges", | |
1169 "X..algues.vertes", | |
1170 "X..Cladophora", | |
1171 "X..Lithophyllum", | |
1172 "Nb.Littorina.obtusata", | |
1173 "Nb.Gibbula.cineraria", | |
1174 "Nb.Gibbula.pennanti", | |
1175 "Nb.Gibbula.umbilicalis", | |
1176 "Nb.Phallusia.mamillata", | |
1177 "Nb.Tethya.aurantium", | |
1178 "Nb.Spirobranchus.lamarckii.total", | |
1179 "Nb.spirorbis.total", | |
1180 "X..Eponges", | |
1181 "X..Ascidies.Coloniales", | |
1182 "X..Ascidies.Solitaires", | |
1183 "X..Bryozoaires.Dresses", | |
1184 "X..Balanes.Vivantes" | |
1185 #, "X..Recouvrement.Sediment" | |
1186 #, "X..Roche.Nue" | |
1187 #, "X..Surface.Accolement" | |
1188 ) | |
1189 | |
1190 egmp_basq_qecb <- c("Nb.Crassostrea.gigas", "Nb.Ostrea.edulis", "X..Mytilus.sp.", "X..Hermelles", "X..Hydraires") | |
1191 | |
1192 bret_egmp_basq_fishing <- c("Nb.Cancer.pagurus..Tourteau.", | |
1193 "Nb.Necora.puber..Etrille.", | |
1194 "Nb.Carcinus.maenas..Crabe.vert.", | |
1195 "Nb.Nucella.lapilus..Pourpre.", | |
1196 "Nb.Galathea..Galathées.", | |
1197 "Nb.Lophozozymus.incisus..ancien.Xantho.incisus.", | |
1198 "Nb.Palaemon.sp..Crevette.bouquet.ou.crevette.rose.", | |
1199 "Nb.Haliotis.tuberculata..Ormeau.", | |
1200 "Nb.Littorina.littorea..Bigorneau.", | |
1201 "Nb.Xantho.pilipes..Xanthe.poilu.", | |
1202 "Nb.Mimachlamys.varia..Pétoncle.noir.") | |
1203 | |
1204 egmp_basq_fishing <- c("Nb.Eriphia.verrucosa..Crabe.verruqueux.", "Nb.Octopus.vulgaris..Poulpe.", "Nb.Paracentrotus.lividus..Oursin.", "Nb.Stramonita.haemastoma..Pourpre.bouche.de.sang.") | |
1205 | |
1206 # here I can choose to either replace spirorbis and/or spirobranchus by their log10 transformation in bret_egmp_basq_qecb vector | |
1207 bret_egmp_basq_qecb <- replace(bret_egmp_basq_qecb, bret_egmp_basq_qecb == "Nb.spirorbis.total", "log10.Nb.spirorbis.total") | |
1208 saveRDS(bret_egmp_basq_qecb, "bret_egmp_basq_qecb.RDS") | |
1209 | |
1210 | |
1211 ############################################################# | |
1212 # # | |
1213 # Compute dissimilarity # | |
1214 # # | |
1215 ############################################################# | |
1216 ### determination of coefficient of dissimilarity face sup mobile bloc vs fixed bloc | |
1217 | |
1218 # loop in a fct | |
1219 | |
1220 matri_fct_bmf <- function(data, conca) { | |
1221 | |
1222 matri_df <- data | |
1223 | |
1224 for (x in c(1:length(unique(matri_df$site_year_month_day)))) { | |
1225 | |
1226 qecbnato0_x <- matri_df %>% dplyr::filter(site_year_month_day == unique(matri_df$site_year_month_day)[[x]]) | |
1227 | |
1228 rownames(qecbnato0_x) <- paste0(qecbnato0_x$Type.Bloc, "_", qecbnato0_x$Face, "_", qecbnato0_x$Numéro.Bloc.échantillon, "_", qecbnato0_x$quadrat_bis) | |
1229 | |
1230 | |
1231 mtxdis <- vegan::vegdist( | |
1232 sqrt(qecbnato0_x[, conca]), #Transform your species abundance data_ Typically, raw abundances are transformed prior to analysis. Usually you will use square root, fourth-root, log(X+1), or presence-absence (square root being least extreme, P/A being most). I would start with square root. (https://stats.stackexchange.com/questions/234495/double-zeroes-problem-with-euclidean-distance-and-abundance-data-is-the-proble) | |
1233 | |
1234 na.rm = TRUE, | |
1235 method = "bray" #Construct species abundance dissimilarity matrices with Bray-Curtis. If your data contains samples that are all-zero you will run into the double zero problem. This can be overcome by using a zero-adjusted Bray-Curtis coefficient, which is sometimes referred to as a 'dummy variable' which damps down the similarity fluctuations between samples that are both zero (undefined). => see below for zero-adjusted Bray-Curtis coefficient ; #another possibility, sqrt + 1 ?? | |
1236 ) | |
1237 | |
1238 | |
1239 #https://rdrr.io/github/phytomosaic/ecole/man/bray0.html | |
1240 # mtxdis <- ecole::bray0( | |
1241 # sqrt | |
1242 # (qecbnato0_x[,conca]), na.rm = TRUE) | |
1243 | |
1244 expand.grid(mtxdis) | |
1245 | |
1246 mtxdisdf_ <- as.data.frame(as.matrix(mtxdis)) | |
1247 | |
1248 a_ <- NA | |
1249 b_ <- NA | |
1250 c_ <- NA | |
1251 d_ <- NA | |
1252 e_ <- NA | |
1253 f_ <- NA | |
1254 g_ <- NA | |
1255 h_ <- NA | |
1256 i_ <- NA | |
1257 j_ <- NA | |
1258 k_ <- NA | |
1259 l_ <- NA | |
1260 m_ <- NA | |
1261 n_ <- NA | |
1262 | |
1263 for (z in c(1:nrow(mtxdisdf_))) { | |
1264 | |
1265 a_[[z]] <- (paste0(rownames(mtxdisdf_[z + 1, ]), " & ", ifelse(nrow(mtxdisdf_) >= 1, colnames(mtxdisdf_[1]), NA))) | |
1266 b_[[z]] <- (paste0(rownames(mtxdisdf_[z + 2, ]), " & ", ifelse(nrow(mtxdisdf_) >= 2, colnames(mtxdisdf_[2]), NA))) | |
1267 c_[[z]] <- (paste0(rownames(mtxdisdf_[z + 3, ]), " & ", ifelse(nrow(mtxdisdf_) >= 3, colnames(mtxdisdf_[3]), NA))) | |
1268 d_[[z]] <- (paste0(rownames(mtxdisdf_[z + 4, ]), " & ", ifelse(nrow(mtxdisdf_) >= 4, colnames(mtxdisdf_[4]), NA))) | |
1269 e_[[z]] <- (paste0(rownames(mtxdisdf_[z + 5, ]), " & ", ifelse(nrow(mtxdisdf_) >= 5, colnames(mtxdisdf_[5]), NA))) | |
1270 f_[[z]] <- (paste0(rownames(mtxdisdf_[z + 6, ]), " & ", ifelse(nrow(mtxdisdf_) >= 6, colnames(mtxdisdf_[6]), NA))) | |
1271 g_[[z]] <- (paste0(rownames(mtxdisdf_[z + 7, ]), " & ", ifelse(nrow(mtxdisdf_) >= 7, colnames(mtxdisdf_[7]), NA))) | |
1272 h_[[z]] <- (paste0(rownames(mtxdisdf_[z + 8, ]), " & ", ifelse(nrow(mtxdisdf_) >= 8, colnames(mtxdisdf_[8]), NA))) | |
1273 i_[[z]] <- (paste0(rownames(mtxdisdf_[z + 9, ]), " & ", ifelse(nrow(mtxdisdf_) >= 9, colnames(mtxdisdf_[9]), NA))) | |
1274 j_[[z]] <- (paste0(rownames(mtxdisdf_[z + 10, ]), " & ", ifelse(nrow(mtxdisdf_) >= 10, colnames(mtxdisdf_[10]), NA))) | |
1275 k_[[z]] <- (paste0(rownames(mtxdisdf_[z + 11, ]), " & ", ifelse(nrow(mtxdisdf_) >= 11, colnames(mtxdisdf_[11]), NA))) | |
1276 l_[[z]] <- (paste0(rownames(mtxdisdf_[z + 12, ]), " & ", ifelse(nrow(mtxdisdf_) >= 12, colnames(mtxdisdf_[12]), NA))) | |
1277 m_[[z]] <- (paste0(rownames(mtxdisdf_[z + 13, ]), " & ", ifelse(nrow(mtxdisdf_) >= 13, colnames(mtxdisdf_[13]), NA))) | |
1278 n_[[z]] <- (paste0(rownames(mtxdisdf_[z + 14, ]), " & ", ifelse(nrow(mtxdisdf_) >= 14, colnames(mtxdisdf_[14]), NA))) | |
1279 | |
1280 } | |
1281 | |
1282 rm(z) | |
1283 | |
1284 y <- length(a_) - (ifelse(nrow(mtxdisdf_) >= 1, 1, nrow(mtxdisdf_))) | |
1285 a_ <- a_[1:y] | |
1286 y <- length(b_) - (ifelse(nrow(mtxdisdf_) >= 2, 2, nrow(mtxdisdf_))) | |
1287 b_ <- b_[1:y] | |
1288 y <- length(c_) - (ifelse(nrow(mtxdisdf_) >= 3, 3, nrow(mtxdisdf_))) | |
1289 c_ <- c_[1:y] | |
1290 y <- length(d_) - (ifelse(nrow(mtxdisdf_) >= 4, 4, nrow(mtxdisdf_))) | |
1291 d_ <- d_[1:y] | |
1292 y <- length(e_) - (ifelse(nrow(mtxdisdf_) >= 5, 5, nrow(mtxdisdf_))) | |
1293 e_ <- e_[1:y] | |
1294 y <- length(f_) - (ifelse(nrow(mtxdisdf_) >= 6, 6, nrow(mtxdisdf_))) | |
1295 f_ <- f_[1:y] | |
1296 y <- length(g_) - (ifelse(nrow(mtxdisdf_) >= 7, 7, nrow(mtxdisdf_))) | |
1297 g_ <- g_[1:y] | |
1298 y <- length(h_) - (ifelse(nrow(mtxdisdf_) >= 8, 8, nrow(mtxdisdf_))) | |
1299 h_ <- h_[1:y] | |
1300 y <- length(i_) - (ifelse(nrow(mtxdisdf_) >= 9, 9, nrow(mtxdisdf_))) | |
1301 i_ <- i_[1:y] | |
1302 y <- length(j_) - (ifelse(nrow(mtxdisdf_) >= 10, 10, nrow(mtxdisdf_))) | |
1303 j_ <- j_[1:y] | |
1304 y <- length(k_) - (ifelse(nrow(mtxdisdf_) >= 11, 11, nrow(mtxdisdf_))) | |
1305 k_ <- k_[1:y] | |
1306 y <- length(l_) - (ifelse(nrow(mtxdisdf_) >= 12, 12, nrow(mtxdisdf_))) | |
1307 l_ <- l_[1:y] | |
1308 y <- length(m_) - (ifelse(nrow(mtxdisdf_) >= 13, 13, nrow(mtxdisdf_))) | |
1309 m_ <- m_[1:y] | |
1310 y <- length(n_) - (ifelse(nrow(mtxdisdf_) >= 14, 14, nrow(mtxdisdf_))) | |
1311 n_ <- n_[1:y] | |
1312 | |
1313 rm(y) | |
1314 | |
1315 name_ <- c(a_, b_, c_, d_, e_, f_, g_, h_, i_, j_, k_, l_, m_, n_) | |
1316 df_ <- data.frame(expand.grid(mtxdis), name_[1:nrow(expand.grid(mtxdis))]) | |
1317 names(df_) <- c("dist.", "name_") | |
1318 | |
1319 rm(a_, b_, c_, d_, e_, f_, g_, h_, i_, j_, k_, l_, m_, n_) | |
1320 rm(name_) | |
1321 | |
1322 q_ <- strsplit(df_$name_, " & ") | |
1323 mat_ <- matrix(unlist(q_), ncol = 2, byrow = TRUE) | |
1324 q_df_ <- as.data.frame(matrix(unlist(q_), ncol = 2, byrow = TRUE)) | |
1325 df_ <- dplyr::bind_cols(df_, q_df_) | |
1326 | |
1327 rm(q_, mat_, q_df_) | |
1328 | |
1329 split_ <- strsplit(df_$V1, "_") | |
1330 v1_split_ <- as.data.frame(matrix(unlist(split_), ncol = 4, byrow = TRUE)) | |
1331 split_ <- strsplit(df_$V2, "_") | |
1332 v2_split_ <- as.data.frame(matrix(unlist(split_), ncol = 4, byrow = TRUE)) | |
1333 | |
1334 df_ <- dplyr::bind_cols(df_, v1_split_) | |
1335 df_ <- dplyr::bind_cols(df_, v2_split_) | |
1336 df_red_ <- subset(df_, V4...8 == V4...12 & V1...5 != V1...9) | |
1337 site_year_month_day <- rep(unique(qecbnato0_x$site_year_month_day), nrow(df_red_)) | |
1338 | |
1339 df_red_ <- tibble::add_column(df_red_, site_year_month_day, .before = "dist.") | |
1340 | |
1341 rm(split_, v1_split_, v2_split_) | |
1342 rm(mtxdis, mtxdisdf_, df_, site_year_month_day) | |
1343 | |
1344 matri_list[[x]] <- df_red_ | |
1345 matri_list <<- matri_list | |
1346 | |
1347 rm(df_red_, qecbnato0_x, x) | |
1348 | |
1349 } | |
1350 | |
1351 matri_df <- do.call("rbind", matri_list) | |
1352 | |
1353 names(matri_df) <- c("site_year_month_day", "dist.", "name_", "name_left", "name_right", "Type.Bloc.left", "Face.left", "Numéro.Bloc.échantillon.left", "Quadrat.left", "Type.Bloc.right", "Face.right", "Numéro.Bloc.échantillon.right", "Quadrat.right") | |
1354 | |
1355 matri_df <<- matri_df | |
1356 | |
1357 hist(matri_df$dist.) | |
1358 | |
1359 } | |
1360 | |
1361 data_ <- dplyr::filter(qecbnato0, Face == "face supérieure") | |
1362 data_$Type.Bloc <- ifelse(as.character(data_$Type.Bloc) == "Roche en place", "Bloc fixé", as.character(data_$Type.Bloc)) | |
1363 matri_list <- vector("list", length(unique(data_$site_year_month_day))) | |
1364 | |
1365 matri_fct_bmf(data = data_, conca = c(bret_egmp_basq_qecb, egmp_basq_qecb)) | |
1366 hist(matri_df$dist., main = c(paste("Histo. of Bray (0-adjusted) dist. dissimilarity measures"), paste(" (sqrt transfo) - BMfs vs BF -"))) | |
1367 | |
1368 matri_full_bm_bf_fs <- matri_df | |
1369 | |
1370 | |
1371 rm(data_, matri_df, matri_list) | |
1372 | |
1373 | |
1374 ### determination of coefficient of dissimilarity between blocs mobiles face sup vs face inf. | |
1375 | |
1376 # loop in a fct | |
1377 | |
1378 matri_fct_bmm <- function(data, conca) { | |
1379 | |
1380 matri_df <- data | |
1381 | |
1382 for (x in c(1:length(unique(matri_df$site_year_month_day)))) { | |
1383 | |
1384 qecbnato0_x <- matri_df %>% dplyr::filter(site_year_month_day == unique(matri_df$site_year_month_day)[[x]]) | |
1385 | |
1386 rownames(qecbnato0_x) <- paste0(qecbnato0_x$Type.Bloc, "_", qecbnato0_x$Face, "_", qecbnato0_x$Numéro.Bloc.échantillon, "_", qecbnato0_x$quadrat_bis) | |
1387 | |
1388 | |
1389 mtxdis <- vegan::vegdist( | |
1390 sqrt(qecbnato0_x[, c(bret_egmp_basq_qecb)]), #Transform your species abundance data_ Typically, raw abundances are transformed prior to analysis. Usually you will use square root, fourth-root, log(X+1), or presence-absence (square root being least extreme, P/A being most). I would start with square root. (https://stats.stackexchange.com/questions/234495/double-zeroes-problem-with-euclidean-distance-and-abundance-data-is-the-proble) | |
1391 na.rm = TRUE, | |
1392 method = "bray" #Construct species abundance dissimilarity matrices with Bray-Curtis. If your data contains samples that are all-zero you will run into the double zero problem. This can be overcome by using a zero-adjusted Bray-Curtis coefficient, which is sometimes referred to as a 'dummy variable' which damps down the similarity fluctuations between samples that are both zero (undefined). => see below for zero-adjusted Bray-Curtis coefficient ; #another possibility, sqrt + 1 ?? | |
1393 ) | |
1394 | |
1395 | |
1396 #mtxdis <- ecole::bray0( | |
1397 # sqrt(qecbnato0_x[, conca]), na.rm = TRUE) | |
1398 | |
1399 | |
1400 expand.grid(mtxdis) | |
1401 | |
1402 mtxdisdf_ <- as.data.frame(as.matrix(mtxdis)) | |
1403 | |
1404 a_ <- NA | |
1405 b_ <- NA | |
1406 c_ <- NA | |
1407 d_ <- NA | |
1408 e_ <- NA | |
1409 f_ <- NA | |
1410 g_ <- NA | |
1411 h_ <- NA | |
1412 i_ <- NA | |
1413 j_ <- NA | |
1414 k_ <- NA | |
1415 l_ <- NA | |
1416 m_ <- NA | |
1417 n_ <- NA | |
1418 o_ <- NA | |
1419 p_ <- NA | |
1420 q_ <- NA | |
1421 r_ <- NA | |
1422 s_ <- NA | |
1423 | |
1424 for (z in c(1:nrow(mtxdisdf_))) { | |
1425 | |
1426 a_[[z]] <- (paste0(rownames(mtxdisdf_[z + 1, ]), " & ", ifelse(nrow(mtxdisdf_) >= 1, colnames(mtxdisdf_[1]), NA))) | |
1427 b_[[z]] <- (paste0(rownames(mtxdisdf_[z + 2, ]), " & ", ifelse(nrow(mtxdisdf_) >= 2, colnames(mtxdisdf_[2]), NA))) | |
1428 c_[[z]] <- (paste0(rownames(mtxdisdf_[z + 3, ]), " & ", ifelse(nrow(mtxdisdf_) >= 3, colnames(mtxdisdf_[3]), NA))) | |
1429 d_[[z]] <- (paste0(rownames(mtxdisdf_[z + 4, ]), " & ", ifelse(nrow(mtxdisdf_) >= 4, colnames(mtxdisdf_[4]), NA))) | |
1430 e_[[z]] <- (paste0(rownames(mtxdisdf_[z + 5, ]), " & ", ifelse(nrow(mtxdisdf_) >= 5, colnames(mtxdisdf_[5]), NA))) | |
1431 f_[[z]] <- (paste0(rownames(mtxdisdf_[z + 6, ]), " & ", ifelse(nrow(mtxdisdf_) >= 6, colnames(mtxdisdf_[6]), NA))) | |
1432 g_[[z]] <- (paste0(rownames(mtxdisdf_[z + 7, ]), " & ", ifelse(nrow(mtxdisdf_) >= 7, colnames(mtxdisdf_[7]), NA))) | |
1433 h_[[z]] <- (paste0(rownames(mtxdisdf_[z + 8, ]), " & ", ifelse(nrow(mtxdisdf_) >= 8, colnames(mtxdisdf_[8]), NA))) | |
1434 i_[[z]] <- (paste0(rownames(mtxdisdf_[z + 9, ]), " & ", ifelse(nrow(mtxdisdf_) >= 9, colnames(mtxdisdf_[9]), NA))) | |
1435 j_[[z]] <- (paste0(rownames(mtxdisdf_[z + 10, ]), " & ", ifelse(nrow(mtxdisdf_) >= 10, colnames(mtxdisdf_[10]), NA))) | |
1436 k_[[z]] <- (paste0(rownames(mtxdisdf_[z + 11, ]), " & ", ifelse(nrow(mtxdisdf_) >= 11, colnames(mtxdisdf_[11]), NA))) | |
1437 l_[[z]] <- (paste0(rownames(mtxdisdf_[z + 12, ]), " & ", ifelse(nrow(mtxdisdf_) >= 12, colnames(mtxdisdf_[12]), NA))) | |
1438 m_[[z]] <- (paste0(rownames(mtxdisdf_[z + 13, ]), " & ", ifelse(nrow(mtxdisdf_) >= 13, colnames(mtxdisdf_[13]), NA))) | |
1439 n_[[z]] <- (paste0(rownames(mtxdisdf_[z + 14, ]), " & ", ifelse(nrow(mtxdisdf_) >= 14, colnames(mtxdisdf_[14]), NA))) | |
1440 o_[[z]] <- (paste0(rownames(mtxdisdf_[z + 15, ]), " & ", ifelse(nrow(mtxdisdf_) >= 15, colnames(mtxdisdf_[15]), NA))) | |
1441 p_[[z]] <- (paste0(rownames(mtxdisdf_[z + 16, ]), " & ", ifelse(nrow(mtxdisdf_) >= 16, colnames(mtxdisdf_[16]), NA))) | |
1442 q_[[z]] <- (paste0(rownames(mtxdisdf_[z + 17, ]), " & ", ifelse(nrow(mtxdisdf_) >= 17, colnames(mtxdisdf_[17]), NA))) | |
1443 r_[[z]] <- (paste0(rownames(mtxdisdf_[z + 18, ]), " & ", ifelse(nrow(mtxdisdf_) >= 18, colnames(mtxdisdf_[18]), NA))) | |
1444 s_[[z]] <- (paste0(rownames(mtxdisdf_[z + 19, ]), " & ", ifelse(nrow(mtxdisdf_) >= 19, colnames(mtxdisdf_[19]), NA))) | |
1445 | |
1446 } | |
1447 | |
1448 rm(z) | |
1449 | |
1450 y <- length(a_) - (ifelse(nrow(mtxdisdf_) >= 1, 1, nrow(mtxdisdf_))) | |
1451 a_ <- a_[1:y] | |
1452 y <- length(b_) - (ifelse(nrow(mtxdisdf_) >= 2, 2, nrow(mtxdisdf_))) | |
1453 b_ <- b_[1:y] | |
1454 y <- length(c_) - (ifelse(nrow(mtxdisdf_) >= 3, 3, nrow(mtxdisdf_))) | |
1455 c_ <- c_[1:y] | |
1456 y <- length(d_) - (ifelse(nrow(mtxdisdf_) >= 4, 4, nrow(mtxdisdf_))) | |
1457 d_ <- d_[1:y] | |
1458 y <- length(e_) - (ifelse(nrow(mtxdisdf_) >= 5, 5, nrow(mtxdisdf_))) | |
1459 e_ <- e_[1:y] | |
1460 y <- length(f_) - (ifelse(nrow(mtxdisdf_) >= 6, 6, nrow(mtxdisdf_))) | |
1461 f_ <- f_[1:y] | |
1462 y <- length(g_) - (ifelse(nrow(mtxdisdf_) >= 7, 7, nrow(mtxdisdf_))) | |
1463 g_ <- g_[1:y] | |
1464 y <- length(h_) - (ifelse(nrow(mtxdisdf_) >= 8, 8, nrow(mtxdisdf_))) | |
1465 h_ <- h_[1:y] | |
1466 y <- length(i_) - (ifelse(nrow(mtxdisdf_) >= 9, 9, nrow(mtxdisdf_))) | |
1467 i_ <- i_[1:y] | |
1468 y <- length(j_) - (ifelse(nrow(mtxdisdf_) >= 10, 10, nrow(mtxdisdf_))) | |
1469 j_ <- j_[1:y] | |
1470 y <- length(k_) - (ifelse(nrow(mtxdisdf_) >= 11, 11, nrow(mtxdisdf_))) | |
1471 k_ <- k_[1:y] | |
1472 y <- length(l_) - (ifelse(nrow(mtxdisdf_) >= 12, 12, nrow(mtxdisdf_))) | |
1473 l_ <- l_[1:y] | |
1474 y <- length(m_) - (ifelse(nrow(mtxdisdf_) >= 13, 13, nrow(mtxdisdf_))) | |
1475 m_ <- m_[1:y] | |
1476 y <- length(n_) - (ifelse(nrow(mtxdisdf_) >= 14, 14, nrow(mtxdisdf_))) | |
1477 n_ <- n_[1:y] | |
1478 y <- length(o_) - (ifelse(nrow(mtxdisdf_) >= 15, 15, nrow(mtxdisdf_))) | |
1479 o_ <- o_[1:y] | |
1480 y <- length(p_) - (ifelse(nrow(mtxdisdf_) >= 16, 16, nrow(mtxdisdf_))) | |
1481 p_ <- p_[1:y] | |
1482 y <- length(q_) - (ifelse(nrow(mtxdisdf_) >= 17, 17, nrow(mtxdisdf_))) | |
1483 q_ <- q_[1:y] | |
1484 y <- length(r_) - (ifelse(nrow(mtxdisdf_) >= 18, 18, nrow(mtxdisdf_))) | |
1485 r_ <- r_[1:y] | |
1486 y <- length(s_) - (ifelse(nrow(mtxdisdf_) >= 19, 19, nrow(mtxdisdf_))) | |
1487 s_ <- s_[1:y] | |
1488 | |
1489 rm(y) | |
1490 | |
1491 name_ <- c(a_, b_, c_, d_, e_, f_, g_, h_, i_, j_, k_, l_, m_, n_, o_, p_, q_, r_, s_) | |
1492 df_ <- data.frame(expand.grid(mtxdis), name_[1:seq_len(nrow(expand.grid(mtxdis)))]) | |
1493 names(df_) <- c("dist.", "name_") | |
1494 | |
1495 rm(a_, b_, c_, d_, e_, f_, g_, h_, i_, j_, k_, l_, m_, n_, o_, p_, q_, r_, s_) | |
1496 rm(name_) | |
1497 | |
1498 q_ <- strsplit(df_$name_, " & ") | |
1499 mat_ <- matrix(unlist(q_), ncol = 2, byrow = TRUE) | |
1500 q_df_ <- as.data.frame(matrix(unlist(q_), ncol = 2, byrow = TRUE)) | |
1501 df_ <- dplyr::bind_cols(df_, q_df_) | |
1502 | |
1503 rm(q_, mat_, q_df_) | |
1504 | |
1505 split_ <- strsplit(df_$V1, "_") | |
1506 v1_split_ <- as.data.frame(matrix(unlist(split_), ncol = 4, byrow = TRUE)) | |
1507 split_ <- strsplit(df_$V2, "_") | |
1508 v2_split_ <- as.data.frame(matrix(unlist(split_), ncol = 4, byrow = TRUE)) | |
1509 | |
1510 df_ <- dplyr::bind_cols(df_, v1_split_) | |
1511 df_ <- dplyr::bind_cols(df_, v2_split_) | |
1512 df_red_ <- subset(df_, V4...8 == V4...12 & V3...7 == V3...11) | |
1513 site_year_month_day <- rep(unique(qecbnato0_x$site_year_month_day), nrow(df_red_)) | |
1514 | |
1515 df_red_ <- tibble::add_column(df_red_, site_year_month_day, .before = "dist.") | |
1516 | |
1517 rm(split_, v1_split_, v2_split_) | |
1518 rm(mtxdis, mtxdisdf_, df_, site_year_month_day) | |
1519 | |
1520 matri_list[[x]] <- df_red_ | |
1521 matri_list <<- matri_list | |
1522 | |
1523 rm(df_red_, qecbnato0_x, x) | |
1524 | |
1525 } | |
1526 | |
1527 matri_df <- do.call("rbind", matri_list) | |
1528 | |
1529 names(matri_df) <- c("site_year_month_day", "dist.", "name_", "name_left", "name_right", "Type.Bloc.left", "Face.left", "Numéro.Bloc.échantillon.left", "Quadrat.left", "Type.Bloc.right", "Face.right", "Numéro.Bloc.échantillon.right", "Quadrat.right") | |
1530 | |
1531 matri_df <<- matri_df | |
1532 | |
1533 hist(matri_df$dist.) | |
1534 | |
1535 } | |
1536 | |
1537 data_ <- dplyr::filter(qecbnato0, Type.Bloc == "Bloc mobile") | |
1538 matri_list <- vector("list", length(unique(data_$site_year_month_day))) | |
1539 | |
1540 matri_fct_bmm(data = data_, conca = c(bret_egmp_basq_qecb, egmp_basq_qecb)) | |
1541 hist(matri_df$dist., main = c(paste("Histo. of Bray (0-adjusted) dist. dissimilarity measures"), paste(" (sqrt transfo) - BMfs vs BMfi -"))) | |
1542 | |
1543 matri_full_bm_bf_fi <- matri_df | |
1544 | |
1545 rm(data_, matri_df, matri_list) | |
1546 | |
1547 | |
1548 ############################################################# | |
1549 # # | |
1550 # Plot dissimilarity # | |
1551 # # | |
1552 ############################################################# | |
1553 ## plot | |
1554 | |
1555 # activate line | |
1556 | |
1557 matri_full_bm_bf_fs <- tidyr::separate(matri_full_bm_bf_fs, "site_year_month_day", into = c("departement", "Site", "Year", "Month", "Day"), remove = FALSE) | |
1558 matri_full_bm_bf_fs$Site <- paste0(matri_full_bm_bf_fs$departement, "_", matri_full_bm_bf_fs$Site) | |
1559 matri_full_bm_bf_fs <- subset(matri_full_bm_bf_fs, select = - c(departement)) | |
1560 | |
1561 matri_full_bm_bf_fs <- tibble::add_column(matri_full_bm_bf_fs, Date = as.Date(paste0(matri_full_bm_bf_fs$Year, "-", matri_full_bm_bf_fs$Month, "-", matri_full_bm_bf_fs$Day), origin = "1970-01-01"), .after = "Site") | |
1562 matri_full_bm_bf_fs$Site <- as.factor(matri_full_bm_bf_fs$Site) | |
1563 | |
1564 matri_full_bm_bf_fi <- tidyr::separate(matri_full_bm_bf_fi, "site_year_month_day", into = c("departement", "Site", "Year", "Month", "Day"), remove = FALSE) | |
1565 matri_full_bm_bf_fi$Site <- paste0(matri_full_bm_bf_fi$departement, "_", matri_full_bm_bf_fi$Site) | |
1566 matri_full_bm_bf_fi <- subset(matri_full_bm_bf_fi, select = - c(departement)) | |
1567 matri_full_bm_bf_fi <- tibble::add_column(matri_full_bm_bf_fi, Date = as.Date(paste0(matri_full_bm_bf_fi$Year, "-", matri_full_bm_bf_fi$Month, "-", matri_full_bm_bf_fi$Day), origin = "1970-01-01"), .after = "Site") | |
1568 matri_full_bm_bf_fi$Site <- as.factor(matri_full_bm_bf_fi$Site) | |
1569 | |
1570 # if error message "Error in .Call.graphics(C_palette2, .Call(C_palette2, NULL)) : invalid graphics state" | |
1571 | |
1572 | |
1573 bf_fs_plot <- ggplot2::ggplot(matri_full_bm_bf_fs, ggplot2::aes(x = Site, y = dist.)) + | |
1574 ggplot2::geom_boxplot() + | |
1575 #geom_jitter(shape = 16, position=position_jitter(0.2)) + | |
1576 ggplot2::xlab("") + | |
1577 ggplot2::ylab("distance diss. BM.BF_FS") + | |
1578 ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1)) | |
1579 | |
1580 ggplot2::ggsave("distance_diss_BF_FS.png", bf_fs_plot, height = 4.5, width = 4) | |
1581 | |
1582 fs_fi_plot <- ggplot2::ggplot(matri_full_bm_bf_fi, ggplot2::aes(x = Site, y = dist.)) + | |
1583 ggplot2::geom_boxplot() + | |
1584 #geom_jitter(shape = 16, position=position_jitter(0.2)) + | |
1585 ggplot2::xlab("") + | |
1586 ggplot2::ylab("distance diss. BM_FS.FI") + | |
1587 ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1)) | |
1588 | |
1589 ggplot2::ggsave("distance_diss_FS_FI.png", fs_fi_plot, height = 4.5, width = 4) | |
1590 | |
1591 # issue with type de bloc, numéro de bloc and quadrat for df_ BM.BF_FS, cfr df_ left vs right variables doesn't give the right combination (variables with left vs right label in names come from the dissimilarity coefficient functions). | |
1592 matri_full_bm_bf_fs$Quadrat <- NA | |
1593 for (i in c(1:seq_len(nrow(matri_full_bm_bf_fs)))) { | |
1594 ifelse(matri_full_bm_bf_fs$Type.Bloc.left[i] == "Bloc mobile", matri_full_bm_bf_fs$Quadrat[i] <- matri_full_bm_bf_fs$Quadrat.left[i], matri_full_bm_bf_fs$Quadrat[i] <- matri_full_bm_bf_fs$Quadrat.right[i]) | |
1595 } | |
1596 matri_full_bm_bf_fs$Numéro.Bloc <- NA | |
1597 for (i in c(1:seq_len(nrow(matri_full_bm_bf_fs)))) { | |
1598 ifelse(matri_full_bm_bf_fs$Type.Bloc.left[i] == "Bloc mobile", matri_full_bm_bf_fs$Numéro.Bloc[i] <- matri_full_bm_bf_fs$Numéro.Bloc.échantillon.left[i], matri_full_bm_bf_fs$Numéro.Bloc[i] <- matri_full_bm_bf_fs$Numéro.Bloc.échantillon.right[i]) | |
1599 } | |
1600 | |
1601 matri_full_bm_bf_fs <- tibble::add_column(matri_full_bm_bf_fs, site_year_month_day.q_BMnb = paste0(matri_full_bm_bf_fs$site_year_month_day, "_", matri_full_bm_bf_fs$Quadrat, "_", matri_full_bm_bf_fs$Numéro.Bloc), .after = "site_year_month_day") | |
1602 matri_full_bm_bf_fi <- tibble::add_column(matri_full_bm_bf_fi, site_year_month_day.q_BMnb = paste0(matri_full_bm_bf_fi$site_year_month_day, "_", matri_full_bm_bf_fi$Quadrat.left, "_", matri_full_bm_bf_fi$Numéro.Bloc.échantillon.left), .after = "site_year_month_day") | |
1603 | |
1604 colnames(matri_full_bm_bf_fs) <- paste("BM.BF_FS", colnames(matri_full_bm_bf_fs), sep = "_") | |
1605 matri_full_bm_bf_fs <- dplyr::rename(matri_full_bm_bf_fs, site_year_month_day.q_BMnb = BM.BF_FS_site_year_month_day.q_BMnb) | |
1606 colnames(matri_full_bm_bf_fi) <- paste("BM_FS.FI", colnames(matri_full_bm_bf_fi), sep = "_") | |
1607 matri_full_bm_bf_fi <- dplyr::rename(matri_full_bm_bf_fi, site_year_month_day.q_BMnb = BM_FS.FI_site_year_month_day.q_BMnb) | |
1608 | |
1609 matri_full <- dplyr::full_join(matri_full_bm_bf_fs[, c("site_year_month_day.q_BMnb", "BM.BF_FS_dist.")], matri_full_bm_bf_fi[, c("site_year_month_day.q_BMnb", "BM_FS.FI_dist.")]) | |
1610 | |
1611 matri_full <- tidyr::separate(matri_full, "site_year_month_day.q_BMnb", into = c("departement", "Site", "Year", "Month", "Day", "Quadrat", "Bloc Mobile Number"), remove = FALSE) | |
1612 matri_full$Site <- paste0(matri_full$departement, "_", matri_full$Site) | |
1613 matri_full <- subset(matri_full, select = - c(departement)) | |
1614 matri_full <- tibble::add_column(matri_full, Date = as.Date(paste0(matri_full$Year, "-", matri_full$Month, "-", matri_full$Day), origin = "1970-01-01"), .after = "Site") | |
1615 | |
1616 # Name for report/plot | |
1617 | |
1618 matri_full <- tibble::add_column(matri_full, Site_bis = matri_full$Site, .after = "Site") | |
1619 | |
1620 matri_full$Site_bis <- ifelse(matri_full$Site == "GDMO_Locmariaquer", "Locmariaquer", matri_full$Site_bis) | |
1621 matri_full$Site_bis <- ifelse(matri_full$Site == "GDMO_BegLann", "Beg Lann", matri_full$Site_bis) | |
1622 matri_full$Site_bis <- ifelse(matri_full$Site == "FOUR_PlateauFour", "Plateau du Four", matri_full$Site_bis) | |
1623 matri_full$Site_bis <- ifelse(matri_full$Site == "EGMP_GroinCou", "Groin du Cou", matri_full$Site_bis) | |
1624 matri_full$Site_bis <- ifelse(matri_full$Site == "EGMP_PasEmsembert", "Le Pas d'Emsembert", matri_full$Site_bis) | |
1625 matri_full$Site_bis <- ifelse(matri_full$Site == "EGMP_BreeBains", "La Brée-les-Bains", matri_full$Site_bis) | |
1626 matri_full$Site_bis <- ifelse(matri_full$Site == "EGMP_PerreAntiochat", "Le Perré d'Antiochat", matri_full$Site_bis) | |
1627 matri_full$Site_bis <- ifelse(matri_full$Site == "EGMP_Chassiron", "Chassiron", matri_full$Site_bis) | |
1628 matri_full$Site_bis <- ifelse(matri_full$Site == "BASQ_FlotsBleusZP", "Les Flots Bleus / zone pêcheurs", matri_full$Site_bis) | |
1629 matri_full$Site_bis <- ifelse(matri_full$Site == "BASQ_FlotsBleusZF", "Les Flots Bleus / zone familles", matri_full$Site_bis) | |
1630 matri_full$Site_bis <- ifelse(matri_full$Site == "GONB_IlotStMichel", "Îlot Saint-Michel", matri_full$Site_bis) | |
1631 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_Quemenes", "Quéménès", matri_full$Site_bis) | |
1632 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_SeinGoulenez", "Île de Sein - Goulenez", matri_full$Site_bis) | |
1633 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_SeinKilaourou", "Île de Sein - Kilaourou", matri_full$Site_bis) | |
1634 matri_full$Site_bis <- ifelse(matri_full$Site == "ARMO_Verdelet", "Îlot du Verdelet", matri_full$Site_bis) | |
1635 matri_full$Site_bis <- ifelse(matri_full$Site == "ARMO_Piegu", "Piégu", matri_full$Site_bis) | |
1636 matri_full$Site_bis <- ifelse(matri_full$Site == "ARMO_Bilfot", "Pointe de Bilfot", matri_full$Site_bis) | |
1637 matri_full$Site_bis <- ifelse(matri_full$Site == "ARMO_IlePlate", "Île Plate", matri_full$Site_bis) | |
1638 matri_full$Site_bis <- ifelse(matri_full$Site == "PDMO_Perharidy", "Perharidy", matri_full$Site_bis) | |
1639 matri_full$Site_bis <- ifelse(matri_full$Site == "BRES_Keraliou", "Keraliou", matri_full$Site_bis) | |
1640 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_Mousterlin", "Pointe de Mousterlin", matri_full$Site_bis) | |
1641 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_StNicolasGlenan", "Saint-Nicolas des Glénan", matri_full$Site_bis) | |
1642 matri_full$Site_bis <- ifelse(matri_full$Site == "FINS_AnseRoz", "Pointe de l'Anse du Roz", matri_full$Site_bis) | |
1643 | |
1644 saveRDS(matri_full, "matri_full.RDS") | |
1645 | |
1646 ############################################################# | |
1647 # # | |
1648 # Plot the dissimilarity per site # | |
1649 # # | |
1650 ############################################################# | |
1651 ## plot dissimilarity coefficient | |
1652 | |
1653 matri_full$Year <- as.integer(matri_full$Year) | |
1654 matri_full$Month <- as.integer(matri_full$Month) | |
1655 matri_full$Day <- as.integer(matri_full$Day) | |
1656 | |
1657 | |
1658 ## BM_FS.FI_dist => mobile boulder upper vs lower faces | |
1659 | |
1660 # Stats | |
1661 | |
1662 bm_fs_fi_dist_stat <- matri_full %>% dplyr::group_by(Site, Site_bis, Date, Year, Month, Day) %>% dplyr::summarize(BM_FS.FI_dist.moy = mean(BM_FS.FI_dist., na.rm = TRUE), BM_FS.FI_dist.et = sd(BM_FS.FI_dist., na.rm = TRUE), BM_FS.FI_dist.med = median(BM_FS.FI_dist., na.rm = TRUE), BM_FS.FI_dist.min = min(BM_FS.FI_dist., na.rm = TRUE), BM_FS.FI_dist.max = max(BM_FS.FI_dist., na.rm = TRUE), nb. = dplyr::n(), nb.notNa = sum(!is.na(BM_FS.FI_dist.))) | |
1663 | |
1664 bm_fs_fi_dist_stat <- dplyr::ungroup(bm_fs_fi_dist_stat) | |
1665 | |
1666 # Quality scale based on quartiles | |
1667 | |
1668 if (choice == "N") { | |
1669 one <- round(mean(unlist(dplyr::filter(matri_full, BM_FS.FI_dist. <= quantile(matri_full$BM_FS.FI_dist., 0.25, na.rm = TRUE))["BM_FS.FI_dist."])), digits = 3) | |
1670 two <- round(mean(unlist(dplyr::filter(matri_full, BM_FS.FI_dist. > quantile(matri_full$BM_FS.FI_dist., 0.25, na.rm = TRUE) & BM_FS.FI_dist. <= quantile(matri_full$BM_FS.FI_dist., 0.5, na.rm = TRUE))["BM_FS.FI_dist."])), digits = 3) | |
1671 three <- round(mean(unlist(dplyr::filter(matri_full, BM_FS.FI_dist. > quantile(matri_full$BM_FS.FI_dist., 0.5, na.rm = TRUE) & BM_FS.FI_dist. <= quantile(matri_full$BM_FS.FI_dist., 0.75, na.rm = TRUE))["BM_FS.FI_dist."])), digits = 3) | |
1672 four <- round(mean(unlist(dplyr::filter(matri_full, BM_FS.FI_dist. > quantile(matri_full$BM_FS.FI_dist., 0.75, na.rm = TRUE))["BM_FS.FI_dist."])), digits = 3) | |
1673 }else { | |
1674 one <- 0.47 | |
1675 two <- 0.7 | |
1676 three <- 0.83 | |
1677 four <- 0.98 | |
1678 } | |
1679 | |
1680 val_xmax <- as.Date(paste0(as.character(choice_date + 1), "-01-01"), origin = "1970-01-01") | |
1681 | |
1682 for (i in c(1:length(unique(bm_fs_fi_dist_stat$Site)))) { | |
1683 | |
1684 df1 <- dplyr::filter(bm_fs_fi_dist_stat, bm_fs_fi_dist_stat$Site == unique(bm_fs_fi_dist_stat$Site)[i]) | |
1685 | |
1686 bm_fs_fi_plot <- ggplot2::ggplot() + | |
1687 ggplot2::geom_point(ggplot2::aes(x = bm_fs_fi_dist_stat$Date, y = bm_fs_fi_dist_stat$BM_FS.FI_dist.med), col = "grey") + | |
1688 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = - 0.1, ymax = one, fill = "blue"), alpha = 0.3) + | |
1689 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = one, ymax = two, fill = "green"), alpha = 0.3) + | |
1690 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = two, ymax = three, fill = "yellow"), alpha = 0.3) + | |
1691 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = three, ymax = four, fill = "orange"), alpha = 0.3) + | |
1692 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = four, ymax = 1.1, fill = "red"), alpha = 0.3) + | |
1693 ggplot2::scale_fill_manual(values = c("#FF0000", "#F59404", "#18E125", "#1A1AE8", "#FAFA15")) + | |
1694 ggplot2::geom_pointrange(ggplot2::aes(x = df1$Date, y = df1$BM_FS.FI_dist.med, ymin = df1$BM_FS.FI_dist.min, ymax = df1$BM_FS.FI_dist.max), col = "black") + | |
1695 ggplot2::xlab("Date") + | |
1696 ggplot2::ylab("Coef dissim BM FS-FI") + | |
1697 ggplot2::ggtitle(unique(df1$Site_bis)) + | |
1698 ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1), legend.position = "none") | |
1699 | |
1700 ggplot2::ggsave(paste0("fs_fi_", df1$Site, ".png"), device = "png", bm_fs_fi_plot, height = 3, width = 3.5) | |
1701 | |
1702 } | |
1703 | |
1704 rm(df1, four, i, one, three, two, xmax_, xmin_, ymax_, ymin_) | |
1705 | |
1706 | |
1707 ## BM.BF_FS_dist => mobile boulder vs fixed boulder upper faces | |
1708 | |
1709 # Stats | |
1710 | |
1711 bm_bf_fs_dist_stat <- matri_full %>% dplyr::group_by(Site, Site_bis, Date, Year, Month, Day) %>% dplyr::summarize(BM.BF_FS_dist.moy = mean(BM.BF_FS_dist., na.rm = TRUE), BM.BF_FS_dist.et = sd(BM.BF_FS_dist., na.rm = TRUE), BM.BF_FS_dist.med = median(BM.BF_FS_dist., na.rm = TRUE), BM.BF_FS_dist.min = min(BM.BF_FS_dist., na.rm = TRUE), BM.BF_FS_dist.max = max(BM.BF_FS_dist., na.rm = TRUE), nb. = dplyr::n(), nb.notNa = sum(!is.na(BM.BF_FS_dist.))) | |
1712 | |
1713 bm_bf_fs_dist_stat <- dplyr::ungroup(bm_bf_fs_dist_stat) | |
1714 | |
1715 # Quality scale based on quartiles | |
1716 | |
1717 if (choice == "N") { | |
1718 one <- round(mean(unlist(dplyr::filter(matri_full, BM.BF_FS_dist. <= quantile(matri_full$BM.BF_FS_dist., 0.25, na.rm = TRUE))["BM.BF_FS_dist."])), digits = 3) | |
1719 two <- round(mean(unlist(dplyr::filter(matri_full, BM.BF_FS_dist. > quantile(matri_full$BM.BF_FS_dist., 0.25, na.rm = TRUE) & BM.BF_FS_dist. <= quantile(matri_full$BM.BF_FS_dist., 0.5, na.rm = TRUE))["BM.BF_FS_dist."])), digits = 3) | |
1720 three <- round(mean(unlist(dplyr::filter(matri_full, BM.BF_FS_dist. > quantile(matri_full$BM.BF_FS_dist., 0.5, na.rm = TRUE) & BM.BF_FS_dist. <= quantile(matri_full$BM.BF_FS_dist., 0.75, na.rm = TRUE))["BM.BF_FS_dist."])), digits = 3) | |
1721 four <- round(mean(unlist(dplyr::filter(matri_full, BM.BF_FS_dist. > quantile(matri_full$BM.BF_FS_dist., 0.75, na.rm = TRUE))["BM.BF_FS_dist."])), digits = 3) | |
1722 | |
1723 }else { | |
1724 one <- 0.19 | |
1725 two <- 0.32 | |
1726 three <- 0.455 | |
1727 four <- 0.735 | |
1728 } | |
1729 # Plot | |
1730 | |
1731 for (i in c(1:length(unique(bm_bf_fs_dist_stat$Site)))) { | |
1732 | |
1733 df1 <- dplyr::filter(bm_bf_fs_dist_stat, bm_bf_fs_dist_stat$Site == unique(bm_bf_fs_dist_stat$Site)[i]) | |
1734 | |
1735 bm_bf_fs_plot <- ggplot2::ggplot() + | |
1736 ggplot2::geom_point(ggplot2::aes(x = bm_bf_fs_dist_stat$Date, y = bm_bf_fs_dist_stat$BM.BF_FS_dist.med), col = "grey") + | |
1737 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = - 0.1, ymax = one, fill = "red"), alpha = 0.3) + | |
1738 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = one, ymax = two, fill = "orange"), alpha = 0.3) + | |
1739 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = two, ymax = three, fill = "yellow"), alpha = 0.3) + | |
1740 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = three, ymax = four, fill = "green"), alpha = 0.3) + | |
1741 ggplot2::geom_rect(ggplot2::aes(xmin = as.Date("2013-01-01", origin = "1970-01-01"), xmax = val_xmax, ymin = four, ymax = 1.1, fill = "blue"), alpha = 0.3) + | |
1742 ggplot2::scale_fill_manual(values = c("#FF0000", "#F59404", "#18E125", "#1A1AE8", "#FAFA15")) + | |
1743 ggplot2::geom_pointrange(ggplot2::aes(x = df1$Date, y = df1$BM.BF_FS_dist.med, ymin = df1$BM.BF_FS_dist.min, ymax = df1$BM.BF_FS_dist.max), col = "black") + | |
1744 ggplot2::xlab("Date") + | |
1745 ggplot2::ylab("Coef dissim BM-BF FS") + | |
1746 ggplot2::ggtitle(unique(df1$Site_bis)) + | |
1747 ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1), legend.position = "none") | |
1748 | |
1749 ggplot2::ggsave(paste0("bm_bf_", df1$Site, ".png"), device = "png", bm_bf_fs_plot, height = 3, width = 3.5) | |
1750 | |
1751 } | |
1752 | |
1753 rm(df1, four, i, one, three, two, xmax_, xmin_, ymax_, ymin_) |