comparison graph_link_var.r @ 0:c7dd4706f982 draft

"planemo upload for repository https://github.com/Marie59/Data_explo_tools commit 2f883743403105d9cac6d267496d985100da3958"
author ecology
date Tue, 27 Jul 2021 16:55:49 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:c7dd4706f982
1 #Rscript
2
3 ################################################
4 ## Link between variables and themselves ##
5 ################################################
6
7 #####Packages : ggplot2
8 # Cowplot
9 # Car
10 # faraway
11 # dplyr
12 # GGally
13 # FactoMiner
14 # factoextra
15 # ggcorrplot
16
17 #####Load arguments
18
19 args <- commandArgs(trailingOnly = TRUE)
20
21 if (length(args) == 0) {
22 stop("This tool needs at least one argument")
23 }else{
24 table <- args[1]
25 hr <- args[2]
26 colli <- as.logical(args[3])
27 vif <- as.logical(args[4])
28 pca <- as.logical(args[5])
29 interr <- as.logical(args[6])
30 auto <- as.logical(args[7])
31 spe <- as.numeric(args[8])
32 col <- as.numeric(strsplit(args[9], ",")[[1]])
33 var <- as.numeric(args[10])
34 var2 <- as.numeric(args[11])
35 var4 <- as.numeric(args[12])
36 }
37
38 if (hr == "false") {
39 hr <- FALSE
40 }else{
41 hr <- TRUE
42 }
43
44 if (length(col) == 1) {
45 stop("Please select two or more numerical columns")
46 }
47
48 #####Import data
49 data <- read.table(table, sep = "\t", dec = ".", header = hr, fill = TRUE, encoding = "UTF-8")
50 if (vif | pca) {
51 data_active <- data[col]
52 #Define the active individuals and the active variables for the PCA
53 }
54
55 if (colli | interr) {
56 colspe <- colnames(data)[spe]
57 }
58
59 if (colli) {
60 data_num <- data[col]
61 data_num$species <- data[, spe]
62 data_num <- data_num[grep("^$", data_num$spe, invert = TRUE), ]
63 }
64
65 if (interr | auto) {
66 colvar <- colnames(data)[var]
67 }
68
69 if (interr) {
70 colvar2 <- colnames(data)[var2]
71 colvar4 <- colnames(data)[var4]
72 }
73
74 #####Your analysis
75
76 ####Independence of the observations####
77
78 acf_tb <- function(data, var) {
79 obj <- acf(data[, var], na.action = na.pass)
80 return(obj)
81 }
82
83 acf_df <- function(data, var) {
84
85 tb <- data.frame(acf = acf_tb(data, var)$acf, lag = acf_tb(data, var)$lag)
86
87 return(tb) # Lag: intervalle temporel entre mesures, fréquence à laquelle on mesure l'auto corrélation.
88 # ACF: indépendance temporelle
89 }
90
91 autocorr <- function(var1, var2) {
92 cat("\nACF\n", var2$acf, file = "acf.txt", fill = 1, append = TRUE)
93 graph <- ggplot2::ggplot() +
94 ggplot2::geom_bar(ggplot2::aes(x = var2$lag, y = var2$acf), stat = "identity", position = "identity", fill = "midnightblue") +
95 ggplot2::geom_hline(mapping = ggplot2::aes(yintercept = qnorm((1 + 0.95) / 2) / sqrt(var1$n.used)),
96 linetype = "dashed") + # calcul interval de confiance à 95% sans correction du bruit blanc.
97 ggplot2::geom_hline(mapping = ggplot2::aes(yintercept = -qnorm((1 + 0.95) / 2) / sqrt(var1$n.used)), linetype = "dashed") + ggplot2::labs(title = "Autocorrelation") + ggplot2::xlab("lag") + ggplot2::ylab("acf")
98 ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
99
100 ggplot2::ggsave("autocorrelation.png", graph)
101 }
102
103 ####Interractions####
104
105 graph <- function(data, var1, var2, var3) {
106 graph <- ggplot2::ggplot(data, ggplot2::aes_string(x = var1, y = var2, group = var3, color = var3)) +
107 ggplot2::geom_point() +
108 ggplot2::geom_smooth(method = lm, se = FALSE) +
109 ggplot2::theme(plot.title = ggplot2::element_text(color = "black", size = 12, face = "bold"))
110
111 return(graph)
112 }
113
114 # Put multiple panels
115 interraction <- function(data, var1, var2, var3, var4) {
116 cat("\nSpecies\n", spe, file = "Species.txt", fill = 1, append = TRUE)
117 if (mult1) {
118 for (spe in unique(data[, var3])) {
119 data_cut <- data[data[, var3] == spe, ]
120 mult_graph <- graph(data_cut, var1, var2, var3) + ggplot2::facet_grid(cols = ggplot2::vars(data_cut[, var4]), scales = "free") +
121 cowplot::background_grid(major = "xy", minor = "none") +
122 cowplot::panel_border() + ggplot2::ggtitle("Interactions")
123
124 ggplot2::ggsave(paste("interaction_of_", spe, ".png"), mult_graph, width = 10, height = 7)
125 }
126 }else{
127 mult_graph <- graph(data, var1, var2, var3) + ggplot2::facet_grid(rows = ggplot2::vars(data[, var3]), cols = ggplot2::vars(data[, var4]), scales = "free") +
128 cowplot::background_grid(major = "xy", minor = "none") +
129 cowplot::panel_border() + ggplot2::ggtitle("Interactions")
130
131 ggplot2::ggsave("interraction.png", mult_graph)
132 }
133 }
134
135 ####Collinearity among covariates####
136 # Create the plots
137
138 coli <- function(data, var) {
139 if (mult2) {
140 cat("\nThere is not enough data on these species they appear too few times in the tabular-file\n", file = "Data.txt", fill = 1, append = TRUE)
141 for (spe in unique(data$species)) {
142 nb_spe <- sum(data$species == spe)
143 if (nb_spe <= 2) {
144 cat(spe, file = "Data.txt", fill = 1, append = TRUE)
145 }else{
146 data_cut <- data[data$species == spe, ]
147 nb <- ncol(data_cut)
148 data_num <- data_cut[, -nb]
149 graph <- GGally::ggpairs(data_num, ggplot2::aes(color = data_cut$species),
150 lower = list(continuous = "points"), axisLabels = "internal")
151
152 ggplot2::ggsave(paste0("collinarity_of_", spe, ".png"), graph, width = 20, height = 15)
153 }
154 }
155
156 }else{
157 nb <- ncol(data)
158 data_cut <- data[, -nb]
159 graph <- GGally::ggpairs(data_cut, ggplot2::aes(color = data[, var]),
160 lower = list(continuous = "points"), axisLabels = "internal") +
161 ggplot2::scale_colour_manual(values = c("#00AFBB", "#E7B800", "#FC4E07")) +
162 ggplot2::scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))
163
164 ggplot2::ggsave("collinarity.png", graph)
165 }
166 }
167
168 ####PCA method####
169
170 plot_pca <- function(data) {
171 #Correlation circle
172 graph_corr <- factoextra::fviz_pca_var(active_data(data), col.var = "cos2",
173 gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
174 repel = TRUE #Avoid text overlap
175 )
176 ggplot2::ggsave("Pca_circle.png", graph_corr)
177 }
178
179 plot_qual <- function(data) {
180 #PCA results for variables
181 var <- factoextra::get_pca_var(active_data(data))
182
183 #representation quality
184 graph_quality <- ggcorrplot::ggcorrplot(var$cos2[!apply(var$cos2, 1, anyNA), ], method = "circle",
185 ggtheme = ggplot2::theme_gray,
186 colors = c("#00AFBB", "#E7B800", "#FC4E07"))
187
188 ggplot2::ggsave("Pca_quality.png", graph_quality)
189 }
190
191 #### Variance inflation factor ####
192
193 myvif <- function(mod) {
194 v <- vcov(mod)
195 assign <- attributes(model.matrix(mod))$assign
196 if (names(coefficients(mod)[1]) == "(Intercept)") {
197 v <- v[-1, -1]
198 assign <- assign[-1]
199 } else warning("No intercept: vifs may not be sensible.")
200 terms <- labels(terms(mod))
201 n_terms <- length(terms)
202 if (n_terms < 2) stop("The model contains fewer than 2 terms")
203 if (length(assign) > dim(v)[1]) {
204 diag(tmp_cor) <- 0
205 if (any(tmp_cor == 1.0)) {
206 return("Sample size is too small, 100% collinearity is present")
207 } else {
208 return("Sample size is too small")
209 }
210 }
211 r <- cov2cor(v)
212 detr <- det(r)
213 result <- matrix(0, n_terms, 3)
214 rownames(result) <- terms
215 colnames(result) <- c("GVIF", "Df", "GVIF^(1/2Df)")
216 for (term in 1:n_terms) {
217 subs <- which(assign == term)
218 result[term, 1] <- det(as.matrix(r[subs, subs])) * det(as.matrix(r[-subs, -subs])) / detr
219 result[term, 2] <- length(subs)
220 }
221 if (all(result[, 2] == 1)) {
222 result <- data.frame(GVIF = result[, 1])
223 } else {
224 result[, 3] <- result[, 1] ^ (1 / (2 * result[, 2]))
225 }
226 invisible(result)
227 }
228 corvif1 <- function(dataz) {
229 dataz <- as.data.frame(dataz)
230 #correlation part
231 tmp_cor <- cor(dataz, use = "complete.obs")
232 return(tmp_cor)
233 }
234 corvif2 <- function(dataz) {
235 dataz <- as.data.frame(dataz)
236 #vif part
237 form <- formula(paste("fooy ~ ", paste(strsplit(names(dataz), " "), collapse = " + ")))
238 dataz <- data.frame(fooy = 1, dataz)
239 lm_mod <- lm(form, dataz)
240
241 return(myvif(lm_mod))
242 }
243
244 #Autocorrelation
245 if (auto) {
246 obj1 <- acf_tb(data, var = colvar)
247 obj2 <- acf_df(data, var = colvar)
248 autocorr(var1 = obj1, var2 = obj2)
249 }
250
251 if (interr) {
252 #Interractions
253 mult1 <- ifelse(length(unique(data[, colspe])) <= 6, FALSE, TRUE)
254 interraction(data, var1 = colvar, var2 = colvar2, var3 = colspe, var4 = colvar4)
255 }
256
257 #Collinearity
258 if (colli) {
259 mult2 <- ifelse(length(unique(data[, spe])) < 3, FALSE, TRUE)
260 coli(data = data_num, var = spe)
261 }
262
263 #PCA
264 if (pca) {
265 active_data <- function(data) {
266 #Calcul of PCA for the active data
267 res_pca <- FactoMineR::PCA(data, graph = FALSE)
268
269 return(res_pca)
270 }
271
272 #eigenvalue
273 eig_val <- capture.output(factoextra::get_eigenvalue(active_data(data_active)))
274
275 cat("\nwrite table with eigenvalue. \n--> \"", paste(eig_val, "\"\n", sep = ""), file = "valeurs.txt", sep = "", append = TRUE)
276
277 plot_pca(data_active)
278 plot_qual(data_active)
279 }
280
281 #VIF
282 if (vif) {
283 #Compute 2 tables#
284 tb_corr <- as.data.frame(corvif1(dataz = data_active))
285 tb_corr <- cbind(x = rownames(tb_corr), tb_corr)
286 tb_vif <- corvif2(dataz = data_active)
287
288 write.table(tb_corr, "corr.tabular", row.names = FALSE, quote = FALSE, sep = "\t", dec = ".", fileEncoding = "UTF-8")
289
290 if (all(is.na(tb_vif))) {
291 tb_vif <- NULL
292 cat("Vif couldn't be calculated, selected data isn't correlated")
293 }else{
294 write.table(tb_vif, "vif.tabular", row.names = FALSE, quote = FALSE, sep = "\t", dec = ".", fileEncoding = "UTF-8")
295 }
296 }