Mercurial > repos > ecology > ecology_link_between_var
comparison graph_link_var.r @ 0:c7dd4706f982 draft
"planemo upload for repository https://github.com/Marie59/Data_explo_tools commit 2f883743403105d9cac6d267496d985100da3958"
author | ecology |
---|---|
date | Tue, 27 Jul 2021 16:55:49 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c7dd4706f982 |
---|---|
1 #Rscript | |
2 | |
3 ################################################ | |
4 ## Link between variables and themselves ## | |
5 ################################################ | |
6 | |
7 #####Packages : ggplot2 | |
8 # Cowplot | |
9 # Car | |
10 # faraway | |
11 # dplyr | |
12 # GGally | |
13 # FactoMiner | |
14 # factoextra | |
15 # ggcorrplot | |
16 | |
17 #####Load arguments | |
18 | |
19 args <- commandArgs(trailingOnly = TRUE) | |
20 | |
21 if (length(args) == 0) { | |
22 stop("This tool needs at least one argument") | |
23 }else{ | |
24 table <- args[1] | |
25 hr <- args[2] | |
26 colli <- as.logical(args[3]) | |
27 vif <- as.logical(args[4]) | |
28 pca <- as.logical(args[5]) | |
29 interr <- as.logical(args[6]) | |
30 auto <- as.logical(args[7]) | |
31 spe <- as.numeric(args[8]) | |
32 col <- as.numeric(strsplit(args[9], ",")[[1]]) | |
33 var <- as.numeric(args[10]) | |
34 var2 <- as.numeric(args[11]) | |
35 var4 <- as.numeric(args[12]) | |
36 } | |
37 | |
38 if (hr == "false") { | |
39 hr <- FALSE | |
40 }else{ | |
41 hr <- TRUE | |
42 } | |
43 | |
44 if (length(col) == 1) { | |
45 stop("Please select two or more numerical columns") | |
46 } | |
47 | |
48 #####Import data | |
49 data <- read.table(table, sep = "\t", dec = ".", header = hr, fill = TRUE, encoding = "UTF-8") | |
50 if (vif | pca) { | |
51 data_active <- data[col] | |
52 #Define the active individuals and the active variables for the PCA | |
53 } | |
54 | |
55 if (colli | interr) { | |
56 colspe <- colnames(data)[spe] | |
57 } | |
58 | |
59 if (colli) { | |
60 data_num <- data[col] | |
61 data_num$species <- data[, spe] | |
62 data_num <- data_num[grep("^$", data_num$spe, invert = TRUE), ] | |
63 } | |
64 | |
65 if (interr | auto) { | |
66 colvar <- colnames(data)[var] | |
67 } | |
68 | |
69 if (interr) { | |
70 colvar2 <- colnames(data)[var2] | |
71 colvar4 <- colnames(data)[var4] | |
72 } | |
73 | |
74 #####Your analysis | |
75 | |
76 ####Independence of the observations#### | |
77 | |
78 acf_tb <- function(data, var) { | |
79 obj <- acf(data[, var], na.action = na.pass) | |
80 return(obj) | |
81 } | |
82 | |
83 acf_df <- function(data, var) { | |
84 | |
85 tb <- data.frame(acf = acf_tb(data, var)$acf, lag = acf_tb(data, var)$lag) | |
86 | |
87 return(tb) # Lag: intervalle temporel entre mesures, fréquence à laquelle on mesure l'auto corrélation. | |
88 # ACF: indépendance temporelle | |
89 } | |
90 | |
91 autocorr <- function(var1, var2) { | |
92 cat("\nACF\n", var2$acf, file = "acf.txt", fill = 1, append = TRUE) | |
93 graph <- ggplot2::ggplot() + | |
94 ggplot2::geom_bar(ggplot2::aes(x = var2$lag, y = var2$acf), stat = "identity", position = "identity", fill = "midnightblue") + | |
95 ggplot2::geom_hline(mapping = ggplot2::aes(yintercept = qnorm((1 + 0.95) / 2) / sqrt(var1$n.used)), | |
96 linetype = "dashed") + # calcul interval de confiance à 95% sans correction du bruit blanc. | |
97 ggplot2::geom_hline(mapping = ggplot2::aes(yintercept = -qnorm((1 + 0.95) / 2) / sqrt(var1$n.used)), linetype = "dashed") + ggplot2::labs(title = "Autocorrelation") + ggplot2::xlab("lag") + ggplot2::ylab("acf") | |
98 ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) | |
99 | |
100 ggplot2::ggsave("autocorrelation.png", graph) | |
101 } | |
102 | |
103 ####Interractions#### | |
104 | |
105 graph <- function(data, var1, var2, var3) { | |
106 graph <- ggplot2::ggplot(data, ggplot2::aes_string(x = var1, y = var2, group = var3, color = var3)) + | |
107 ggplot2::geom_point() + | |
108 ggplot2::geom_smooth(method = lm, se = FALSE) + | |
109 ggplot2::theme(plot.title = ggplot2::element_text(color = "black", size = 12, face = "bold")) | |
110 | |
111 return(graph) | |
112 } | |
113 | |
114 # Put multiple panels | |
115 interraction <- function(data, var1, var2, var3, var4) { | |
116 cat("\nSpecies\n", spe, file = "Species.txt", fill = 1, append = TRUE) | |
117 if (mult1) { | |
118 for (spe in unique(data[, var3])) { | |
119 data_cut <- data[data[, var3] == spe, ] | |
120 mult_graph <- graph(data_cut, var1, var2, var3) + ggplot2::facet_grid(cols = ggplot2::vars(data_cut[, var4]), scales = "free") + | |
121 cowplot::background_grid(major = "xy", minor = "none") + | |
122 cowplot::panel_border() + ggplot2::ggtitle("Interactions") | |
123 | |
124 ggplot2::ggsave(paste("interaction_of_", spe, ".png"), mult_graph, width = 10, height = 7) | |
125 } | |
126 }else{ | |
127 mult_graph <- graph(data, var1, var2, var3) + ggplot2::facet_grid(rows = ggplot2::vars(data[, var3]), cols = ggplot2::vars(data[, var4]), scales = "free") + | |
128 cowplot::background_grid(major = "xy", minor = "none") + | |
129 cowplot::panel_border() + ggplot2::ggtitle("Interactions") | |
130 | |
131 ggplot2::ggsave("interraction.png", mult_graph) | |
132 } | |
133 } | |
134 | |
135 ####Collinearity among covariates#### | |
136 # Create the plots | |
137 | |
138 coli <- function(data, var) { | |
139 if (mult2) { | |
140 cat("\nThere is not enough data on these species they appear too few times in the tabular-file\n", file = "Data.txt", fill = 1, append = TRUE) | |
141 for (spe in unique(data$species)) { | |
142 nb_spe <- sum(data$species == spe) | |
143 if (nb_spe <= 2) { | |
144 cat(spe, file = "Data.txt", fill = 1, append = TRUE) | |
145 }else{ | |
146 data_cut <- data[data$species == spe, ] | |
147 nb <- ncol(data_cut) | |
148 data_num <- data_cut[, -nb] | |
149 graph <- GGally::ggpairs(data_num, ggplot2::aes(color = data_cut$species), | |
150 lower = list(continuous = "points"), axisLabels = "internal") | |
151 | |
152 ggplot2::ggsave(paste0("collinarity_of_", spe, ".png"), graph, width = 20, height = 15) | |
153 } | |
154 } | |
155 | |
156 }else{ | |
157 nb <- ncol(data) | |
158 data_cut <- data[, -nb] | |
159 graph <- GGally::ggpairs(data_cut, ggplot2::aes(color = data[, var]), | |
160 lower = list(continuous = "points"), axisLabels = "internal") + | |
161 ggplot2::scale_colour_manual(values = c("#00AFBB", "#E7B800", "#FC4E07")) + | |
162 ggplot2::scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07")) | |
163 | |
164 ggplot2::ggsave("collinarity.png", graph) | |
165 } | |
166 } | |
167 | |
168 ####PCA method#### | |
169 | |
170 plot_pca <- function(data) { | |
171 #Correlation circle | |
172 graph_corr <- factoextra::fviz_pca_var(active_data(data), col.var = "cos2", | |
173 gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), | |
174 repel = TRUE #Avoid text overlap | |
175 ) | |
176 ggplot2::ggsave("Pca_circle.png", graph_corr) | |
177 } | |
178 | |
179 plot_qual <- function(data) { | |
180 #PCA results for variables | |
181 var <- factoextra::get_pca_var(active_data(data)) | |
182 | |
183 #representation quality | |
184 graph_quality <- ggcorrplot::ggcorrplot(var$cos2[!apply(var$cos2, 1, anyNA), ], method = "circle", | |
185 ggtheme = ggplot2::theme_gray, | |
186 colors = c("#00AFBB", "#E7B800", "#FC4E07")) | |
187 | |
188 ggplot2::ggsave("Pca_quality.png", graph_quality) | |
189 } | |
190 | |
191 #### Variance inflation factor #### | |
192 | |
193 myvif <- function(mod) { | |
194 v <- vcov(mod) | |
195 assign <- attributes(model.matrix(mod))$assign | |
196 if (names(coefficients(mod)[1]) == "(Intercept)") { | |
197 v <- v[-1, -1] | |
198 assign <- assign[-1] | |
199 } else warning("No intercept: vifs may not be sensible.") | |
200 terms <- labels(terms(mod)) | |
201 n_terms <- length(terms) | |
202 if (n_terms < 2) stop("The model contains fewer than 2 terms") | |
203 if (length(assign) > dim(v)[1]) { | |
204 diag(tmp_cor) <- 0 | |
205 if (any(tmp_cor == 1.0)) { | |
206 return("Sample size is too small, 100% collinearity is present") | |
207 } else { | |
208 return("Sample size is too small") | |
209 } | |
210 } | |
211 r <- cov2cor(v) | |
212 detr <- det(r) | |
213 result <- matrix(0, n_terms, 3) | |
214 rownames(result) <- terms | |
215 colnames(result) <- c("GVIF", "Df", "GVIF^(1/2Df)") | |
216 for (term in 1:n_terms) { | |
217 subs <- which(assign == term) | |
218 result[term, 1] <- det(as.matrix(r[subs, subs])) * det(as.matrix(r[-subs, -subs])) / detr | |
219 result[term, 2] <- length(subs) | |
220 } | |
221 if (all(result[, 2] == 1)) { | |
222 result <- data.frame(GVIF = result[, 1]) | |
223 } else { | |
224 result[, 3] <- result[, 1] ^ (1 / (2 * result[, 2])) | |
225 } | |
226 invisible(result) | |
227 } | |
228 corvif1 <- function(dataz) { | |
229 dataz <- as.data.frame(dataz) | |
230 #correlation part | |
231 tmp_cor <- cor(dataz, use = "complete.obs") | |
232 return(tmp_cor) | |
233 } | |
234 corvif2 <- function(dataz) { | |
235 dataz <- as.data.frame(dataz) | |
236 #vif part | |
237 form <- formula(paste("fooy ~ ", paste(strsplit(names(dataz), " "), collapse = " + "))) | |
238 dataz <- data.frame(fooy = 1, dataz) | |
239 lm_mod <- lm(form, dataz) | |
240 | |
241 return(myvif(lm_mod)) | |
242 } | |
243 | |
244 #Autocorrelation | |
245 if (auto) { | |
246 obj1 <- acf_tb(data, var = colvar) | |
247 obj2 <- acf_df(data, var = colvar) | |
248 autocorr(var1 = obj1, var2 = obj2) | |
249 } | |
250 | |
251 if (interr) { | |
252 #Interractions | |
253 mult1 <- ifelse(length(unique(data[, colspe])) <= 6, FALSE, TRUE) | |
254 interraction(data, var1 = colvar, var2 = colvar2, var3 = colspe, var4 = colvar4) | |
255 } | |
256 | |
257 #Collinearity | |
258 if (colli) { | |
259 mult2 <- ifelse(length(unique(data[, spe])) < 3, FALSE, TRUE) | |
260 coli(data = data_num, var = spe) | |
261 } | |
262 | |
263 #PCA | |
264 if (pca) { | |
265 active_data <- function(data) { | |
266 #Calcul of PCA for the active data | |
267 res_pca <- FactoMineR::PCA(data, graph = FALSE) | |
268 | |
269 return(res_pca) | |
270 } | |
271 | |
272 #eigenvalue | |
273 eig_val <- capture.output(factoextra::get_eigenvalue(active_data(data_active))) | |
274 | |
275 cat("\nwrite table with eigenvalue. \n--> \"", paste(eig_val, "\"\n", sep = ""), file = "valeurs.txt", sep = "", append = TRUE) | |
276 | |
277 plot_pca(data_active) | |
278 plot_qual(data_active) | |
279 } | |
280 | |
281 #VIF | |
282 if (vif) { | |
283 #Compute 2 tables# | |
284 tb_corr <- as.data.frame(corvif1(dataz = data_active)) | |
285 tb_corr <- cbind(x = rownames(tb_corr), tb_corr) | |
286 tb_vif <- corvif2(dataz = data_active) | |
287 | |
288 write.table(tb_corr, "corr.tabular", row.names = FALSE, quote = FALSE, sep = "\t", dec = ".", fileEncoding = "UTF-8") | |
289 | |
290 if (all(is.na(tb_vif))) { | |
291 tb_vif <- NULL | |
292 cat("Vif couldn't be calculated, selected data isn't correlated") | |
293 }else{ | |
294 write.table(tb_vif, "vif.tabular", row.names = FALSE, quote = FALSE, sep = "\t", dec = ".", fileEncoding = "UTF-8") | |
295 } | |
296 } |