Mercurial > repos > ecology > tool_anonymization
comparison graph_link_var.r @ 0:726a387cfdc2 draft default tip
"planemo upload for repository https://github.com/Marie59/Data_explo_tools commit 60627aba07951226c8fd6bb3115be4bd118edd4e"
| author | ecology | 
|---|---|
| date | Fri, 13 Aug 2021 18:17:11 +0000 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:726a387cfdc2 | 
|---|---|
| 1 #Rscript | |
| 2 | |
| 3 ################################################ | |
| 4 ## Link between variables and themselves ## | |
| 5 ################################################ | |
| 6 | |
| 7 #####Packages : ggplot2 | |
| 8 # Cowplot | |
| 9 # Car | |
| 10 # faraway | |
| 11 # dplyr | |
| 12 # GGally | |
| 13 # FactoMiner | |
| 14 # factoextra | |
| 15 # ggcorrplot | |
| 16 | |
| 17 #####Load arguments | |
| 18 | |
| 19 args <- commandArgs(trailingOnly = TRUE) | |
| 20 | |
| 21 if (length(args) == 0) { | |
| 22 stop("This tool needs at least one argument") | |
| 23 }else{ | |
| 24 table <- args[1] | |
| 25 hr <- args[2] | |
| 26 colli <- as.logical(args[3]) | |
| 27 vif <- as.logical(args[4]) | |
| 28 pca <- as.logical(args[5]) | |
| 29 interr <- as.logical(args[6]) | |
| 30 auto <- as.logical(args[7]) | |
| 31 spe <- as.numeric(args[8]) | |
| 32 col <- as.numeric(strsplit(args[9], ",")[[1]]) | |
| 33 var <- as.numeric(args[10]) | |
| 34 var2 <- as.numeric(args[11]) | |
| 35 var4 <- as.numeric(args[12]) | |
| 36 } | |
| 37 | |
| 38 if (hr == "false") { | |
| 39 hr <- FALSE | |
| 40 }else{ | |
| 41 hr <- TRUE | |
| 42 } | |
| 43 | |
| 44 if (length(col) == 1) { | |
| 45 stop("Please select two or more numerical columns") | |
| 46 } | |
| 47 | |
| 48 #####Import data | |
| 49 data <- read.table(table, sep = "\t", dec = ".", header = hr, fill = TRUE, encoding = "UTF-8") | |
| 50 if (vif | pca) { | |
| 51 data_active <- data[col] | |
| 52 #Define the active individuals and the active variables for the PCA | |
| 53 } | |
| 54 | |
| 55 if (colli | interr) { | |
| 56 colspe <- colnames(data)[spe] | |
| 57 } | |
| 58 | |
| 59 if (colli) { | |
| 60 data_num <- data[col] | |
| 61 data_num$species <- data[, spe] | |
| 62 data_num <- data_num[grep("^$", data_num$spe, invert = TRUE), ] | |
| 63 } | |
| 64 | |
| 65 if (interr | auto) { | |
| 66 colvar <- colnames(data)[var] | |
| 67 } | |
| 68 | |
| 69 if (interr) { | |
| 70 colvar2 <- colnames(data)[var2] | |
| 71 colvar4 <- colnames(data)[var4] | |
| 72 } | |
| 73 | |
| 74 #####Your analysis | |
| 75 | |
| 76 ####Independence of the observations#### | |
| 77 | |
| 78 acf_tb <- function(data, var) { | |
| 79 obj <- acf(data[, var], na.action = na.pass) | |
| 80 return(obj) | |
| 81 } | |
| 82 | |
| 83 acf_df <- function(data, var) { | |
| 84 | |
| 85 tb <- data.frame(acf = acf_tb(data, var)$acf, lag = acf_tb(data, var)$lag) | |
| 86 | |
| 87 return(tb) # Lag: intervalle temporel entre mesures, fréquence à laquelle on mesure l'auto corrélation. | |
| 88 # ACF: indépendance temporelle | |
| 89 } | |
| 90 | |
| 91 autocorr <- function(var1, var2) { | |
| 92 cat("\nACF\n", var2$acf, file = "acf.txt", fill = 1, append = TRUE) | |
| 93 graph <- ggplot2::ggplot() + | |
| 94 ggplot2::geom_bar(ggplot2::aes(x = var2$lag, y = var2$acf), stat = "identity", position = "identity", fill = "midnightblue") + | |
| 95 ggplot2::geom_hline(mapping = ggplot2::aes(yintercept = qnorm((1 + 0.95) / 2) / sqrt(var1$n.used)), | |
| 96 linetype = "dashed") + # calcul interval de confiance à 95% sans correction du bruit blanc. | |
| 97 ggplot2::geom_hline(mapping = ggplot2::aes(yintercept = -qnorm((1 + 0.95) / 2) / sqrt(var1$n.used)), linetype = "dashed") + ggplot2::labs(title = "Autocorrelation") + ggplot2::xlab("lag") + ggplot2::ylab("acf") | |
| 98 ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5)) | |
| 99 | |
| 100 ggplot2::ggsave("autocorrelation.png", graph) | |
| 101 } | |
| 102 | |
| 103 ####Interractions#### | |
| 104 | |
| 105 graph <- function(data, var1, var2, var3) { | |
| 106 graph <- ggplot2::ggplot(data, ggplot2::aes_string(x = var1, y = var2, group = var3, color = var3)) + | |
| 107 ggplot2::geom_point() + | |
| 108 ggplot2::geom_smooth(method = lm, se = FALSE) + | |
| 109 ggplot2::theme(plot.title = ggplot2::element_text(color = "black", size = 12, face = "bold")) | |
| 110 | |
| 111 return(graph) | |
| 112 } | |
| 113 | |
| 114 # Put multiple panels | |
| 115 interraction <- function(data, var1, var2, var3, var4) { | |
| 116 cat("\nSpecies\n", spe, file = "Species.txt", fill = 1, append = TRUE) | |
| 117 if (mult1) { | |
| 118 for (spe in unique(data[, var3])) { | |
| 119 data_cut <- data[data[, var3] == spe, ] | |
| 120 mult_graph <- graph(data_cut, var1, var2, var3) + ggplot2::facet_grid(cols = ggplot2::vars(data_cut[, var4]), scales = "free") + | |
| 121 cowplot::background_grid(major = "xy", minor = "none") + | |
| 122 cowplot::panel_border() + ggplot2::ggtitle("Interactions") | |
| 123 | |
| 124 ggplot2::ggsave(paste("interaction_of_", spe, ".png"), mult_graph, width = 10, height = 7) | |
| 125 } | |
| 126 }else{ | |
| 127 mult_graph <- graph(data, var1, var2, var3) + ggplot2::facet_grid(rows = ggplot2::vars(data[, var3]), cols = ggplot2::vars(data[, var4]), scales = "free") + | |
| 128 cowplot::background_grid(major = "xy", minor = "none") + | |
| 129 cowplot::panel_border() + ggplot2::ggtitle("Interactions") | |
| 130 | |
| 131 ggplot2::ggsave("interraction.png", mult_graph) | |
| 132 } | |
| 133 } | |
| 134 | |
| 135 ####Collinearity among covariates#### | |
| 136 # Create the plots | |
| 137 | |
| 138 coli <- function(data, var) { | |
| 139 if (mult2) { | |
| 140 cat("\nThere is not enough data on these species they appear too few times in the tabular-file\n", file = "Data.txt", fill = 1, append = TRUE) | |
| 141 for (spe in unique(data$species)) { | |
| 142 nb_spe <- sum(data$species == spe) | |
| 143 if (nb_spe <= 2) { | |
| 144 cat(spe, file = "Data.txt", fill = 1, append = TRUE) | |
| 145 }else{ | |
| 146 data_cut <- data[data$species == spe, ] | |
| 147 nb <- ncol(data_cut) | |
| 148 data_num <- data_cut[, -nb] | |
| 149 graph <- GGally::ggpairs(data_num, ggplot2::aes(color = data_cut$species), | |
| 150 lower = list(continuous = "points"), axisLabels = "internal") | |
| 151 | |
| 152 ggplot2::ggsave(paste0("collinarity_of_", spe, ".png"), graph, width = 20, height = 15) | |
| 153 } | |
| 154 } | |
| 155 | |
| 156 }else{ | |
| 157 nb <- ncol(data) | |
| 158 data_cut <- data[, -nb] | |
| 159 graph <- GGally::ggpairs(data_cut, ggplot2::aes(color = data[, var]), | |
| 160 lower = list(continuous = "points"), axisLabels = "internal") + | |
| 161 ggplot2::scale_colour_manual(values = c("#00AFBB", "#E7B800", "#FC4E07")) + | |
| 162 ggplot2::scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07")) | |
| 163 | |
| 164 ggplot2::ggsave("collinarity.png", graph) | |
| 165 } | |
| 166 } | |
| 167 | |
| 168 ####PCA method#### | |
| 169 | |
| 170 plot_pca <- function(data) { | |
| 171 #Correlation circle | |
| 172 graph_corr <- factoextra::fviz_pca_var(active_data(data), col.var = "cos2", | |
| 173 gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), | |
| 174 repel = TRUE #Avoid text overlap | |
| 175 ) | |
| 176 ggplot2::ggsave("Pca_circle.png", graph_corr) | |
| 177 } | |
| 178 | |
| 179 plot_qual <- function(data) { | |
| 180 #PCA results for variables | |
| 181 var <- factoextra::get_pca_var(active_data(data)) | |
| 182 | |
| 183 #representation quality | |
| 184 graph_quality <- ggcorrplot::ggcorrplot(var$cos2[!apply(var$cos2, 1, anyNA), ], method = "circle", | |
| 185 ggtheme = ggplot2::theme_gray, | |
| 186 colors = c("#00AFBB", "#E7B800", "#FC4E07")) | |
| 187 | |
| 188 ggplot2::ggsave("Pca_quality.png", graph_quality) | |
| 189 } | |
| 190 | |
| 191 #### Variance inflation factor #### | |
| 192 | |
| 193 myvif <- function(mod) { | |
| 194 v <- vcov(mod) | |
| 195 assign <- attributes(model.matrix(mod))$assign | |
| 196 if (names(coefficients(mod)[1]) == "(Intercept)") { | |
| 197 v <- v[-1, -1] | |
| 198 assign <- assign[-1] | |
| 199 } else warning("No intercept: vifs may not be sensible.") | |
| 200 terms <- labels(terms(mod)) | |
| 201 n_terms <- length(terms) | |
| 202 if (n_terms < 2) stop("The model contains fewer than 2 terms") | |
| 203 if (length(assign) > dim(v)[1]) { | |
| 204 diag(tmp_cor) <- 0 | |
| 205 if (any(tmp_cor == 1.0)) { | |
| 206 return("Sample size is too small, 100% collinearity is present") | |
| 207 } else { | |
| 208 return("Sample size is too small") | |
| 209 } | |
| 210 } | |
| 211 r <- cov2cor(v) | |
| 212 detr <- det(r) | |
| 213 result <- matrix(0, n_terms, 3) | |
| 214 rownames(result) <- terms | |
| 215 colnames(result) <- c("GVIF", "Df", "GVIF^(1/2Df)") | |
| 216 for (term in 1:n_terms) { | |
| 217 subs <- which(assign == term) | |
| 218 result[term, 1] <- det(as.matrix(r[subs, subs])) * det(as.matrix(r[-subs, -subs])) / detr | |
| 219 result[term, 2] <- length(subs) | |
| 220 } | |
| 221 if (all(result[, 2] == 1)) { | |
| 222 result <- data.frame(GVIF = result[, 1]) | |
| 223 } else { | |
| 224 result[, 3] <- result[, 1] ^ (1 / (2 * result[, 2])) | |
| 225 } | |
| 226 invisible(result) | |
| 227 } | |
| 228 corvif1 <- function(dataz) { | |
| 229 dataz <- as.data.frame(dataz) | |
| 230 #correlation part | |
| 231 tmp_cor <- cor(dataz, use = "complete.obs") | |
| 232 return(tmp_cor) | |
| 233 } | |
| 234 corvif2 <- function(dataz) { | |
| 235 dataz <- as.data.frame(dataz) | |
| 236 #vif part | |
| 237 form <- formula(paste("fooy ~ ", paste(strsplit(names(dataz), " "), collapse = " + "))) | |
| 238 dataz <- data.frame(fooy = 1, dataz) | |
| 239 lm_mod <- lm(form, dataz) | |
| 240 | |
| 241 return(myvif(lm_mod)) | |
| 242 } | |
| 243 | |
| 244 #Autocorrelation | |
| 245 if (auto) { | |
| 246 obj1 <- acf_tb(data, var = colvar) | |
| 247 obj2 <- acf_df(data, var = colvar) | |
| 248 autocorr(var1 = obj1, var2 = obj2) | |
| 249 } | |
| 250 | |
| 251 if (interr) { | |
| 252 #Interractions | |
| 253 mult1 <- ifelse(length(unique(data[, colspe])) <= 6, FALSE, TRUE) | |
| 254 interraction(data, var1 = colvar, var2 = colvar2, var3 = colspe, var4 = colvar4) | |
| 255 } | |
| 256 | |
| 257 #Collinearity | |
| 258 if (colli) { | |
| 259 mult2 <- ifelse(length(unique(data[, spe])) < 3, FALSE, TRUE) | |
| 260 coli(data = data_num, var = spe) | |
| 261 } | |
| 262 | |
| 263 #PCA | |
| 264 if (pca) { | |
| 265 active_data <- function(data) { | |
| 266 #Calcul of PCA for the active data | |
| 267 res_pca <- FactoMineR::PCA(data, graph = FALSE) | |
| 268 | |
| 269 return(res_pca) | |
| 270 } | |
| 271 | |
| 272 #eigenvalue | |
| 273 eig_val <- capture.output(factoextra::get_eigenvalue(active_data(data_active))) | |
| 274 | |
| 275 cat("\nwrite table with eigenvalue. \n--> \"", paste(eig_val, "\"\n", sep = ""), file = "valeurs.txt", sep = "", append = TRUE) | |
| 276 | |
| 277 plot_pca(data_active) | |
| 278 plot_qual(data_active) | |
| 279 } | |
| 280 | |
| 281 #VIF | |
| 282 if (vif) { | |
| 283 #Compute 2 tables# | |
| 284 tb_corr <- as.data.frame(corvif1(dataz = data_active)) | |
| 285 tb_corr <- cbind(x = rownames(tb_corr), tb_corr) | |
| 286 tb_vif <- corvif2(dataz = data_active) | |
| 287 | |
| 288 write.table(tb_corr, "corr.tabular", row.names = FALSE, quote = FALSE, sep = "\t", dec = ".", fileEncoding = "UTF-8") | |
| 289 | |
| 290 if (all(is.na(tb_vif))) { | |
| 291 tb_vif <- NULL | |
| 292 cat("Vif couldn't be calculated, selected data isn't correlated") | |
| 293 }else{ | |
| 294 write.table(tb_vif, "vif.tabular", row.names = FALSE, quote = FALSE, sep = "\t", dec = ".", fileEncoding = "UTF-8") | |
| 295 } | |
| 296 } | 
