diff graph_link_var.r @ 0:c7dd4706f982 draft

"planemo upload for repository https://github.com/Marie59/Data_explo_tools commit 2f883743403105d9cac6d267496d985100da3958"
author ecology
date Tue, 27 Jul 2021 16:55:49 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/graph_link_var.r	Tue Jul 27 16:55:49 2021 +0000
@@ -0,0 +1,296 @@
+#Rscript
+
+################################################
+##    Link between variables and themselves   ##
+################################################
+
+#####Packages : ggplot2
+#               Cowplot
+#               Car
+#               faraway
+#               dplyr
+#               GGally
+#               FactoMiner
+#               factoextra
+#               ggcorrplot
+
+#####Load arguments
+
+args <- commandArgs(trailingOnly = TRUE)
+
+if (length(args) == 0) {
+    stop("This tool needs at least one argument")
+}else{
+    table <- args[1]
+    hr <- args[2]
+    colli <- as.logical(args[3])
+    vif <- as.logical(args[4])
+    pca <- as.logical(args[5])
+    interr <- as.logical(args[6])
+    auto <- as.logical(args[7])
+    spe <- as.numeric(args[8])
+    col <- as.numeric(strsplit(args[9], ",")[[1]])
+    var <- as.numeric(args[10])
+    var2 <- as.numeric(args[11])
+    var4 <- as.numeric(args[12])
+}
+
+if (hr == "false") {
+  hr <- FALSE
+}else{
+  hr <- TRUE
+}
+
+if (length(col) == 1) {
+stop("Please select two or more numerical columns")
+}
+
+#####Import data
+data <- read.table(table, sep = "\t", dec = ".", header = hr, fill = TRUE, encoding = "UTF-8")
+if (vif | pca) {
+data_active <- data[col]
+#Define the active individuals and the active variables for the PCA
+}
+
+if (colli | interr) {
+colspe <- colnames(data)[spe]
+}
+
+if (colli) {
+data_num <- data[col]
+data_num$species <- data[, spe]
+data_num <- data_num[grep("^$", data_num$spe, invert = TRUE), ]
+}
+
+if (interr | auto) {
+colvar <- colnames(data)[var]
+}
+
+if (interr) {
+colvar2 <- colnames(data)[var2]
+colvar4 <- colnames(data)[var4]
+}
+
+#####Your analysis
+
+####Independence of the observations####
+
+acf_tb <- function(data, var) {
+obj <- acf(data[, var], na.action = na.pass)
+  return(obj)
+}
+
+acf_df <- function(data, var) {
+
+tb <- data.frame(acf = acf_tb(data, var)$acf, lag = acf_tb(data, var)$lag)
+
+  return(tb) # Lag: intervalle temporel entre mesures, fréquence à laquelle on mesure l'auto corrélation.
+# ACF: indépendance temporelle
+}
+
+autocorr <- function(var1, var2) {
+  cat("\nACF\n", var2$acf, file = "acf.txt", fill = 1, append = TRUE)
+  graph <- ggplot2::ggplot() +
+  ggplot2::geom_bar(ggplot2::aes(x = var2$lag, y = var2$acf), stat = "identity", position = "identity", fill = "midnightblue") +
+  ggplot2::geom_hline(mapping = ggplot2::aes(yintercept = qnorm((1 + 0.95) / 2) / sqrt(var1$n.used)),
+             linetype = "dashed") + # calcul interval de confiance à 95% sans correction du bruit blanc.
+  ggplot2::geom_hline(mapping = ggplot2::aes(yintercept = -qnorm((1 + 0.95) / 2) / sqrt(var1$n.used)), linetype = "dashed") + ggplot2::labs(title = "Autocorrelation") + ggplot2::xlab("lag") + ggplot2::ylab("acf")
+  ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))
+
+  ggplot2::ggsave("autocorrelation.png", graph)
+}
+
+####Interractions####
+
+graph <- function(data, var1, var2, var3) {
+  graph <- ggplot2::ggplot(data, ggplot2::aes_string(x = var1, y = var2, group = var3, color = var3)) +
+  ggplot2::geom_point() +
+  ggplot2::geom_smooth(method = lm, se = FALSE) +
+  ggplot2::theme(plot.title = ggplot2::element_text(color = "black", size = 12, face = "bold"))
+
+return(graph)
+}
+
+# Put multiple panels
+interraction <- function(data, var1, var2, var3, var4) {
+  cat("\nSpecies\n", spe, file = "Species.txt", fill = 1, append = TRUE)
+  if (mult1) {
+      for (spe in unique(data[, var3])) {
+      data_cut <- data[data[, var3] == spe, ]
+      mult_graph <- graph(data_cut, var1, var2, var3) + ggplot2::facet_grid(cols = ggplot2::vars(data_cut[, var4]), scales = "free") +
+      cowplot::background_grid(major = "xy", minor = "none") +
+      cowplot::panel_border() + ggplot2::ggtitle("Interactions")
+
+      ggplot2::ggsave(paste("interaction_of_", spe, ".png"), mult_graph, width = 10, height = 7)
+      }
+    }else{
+    mult_graph <- graph(data, var1, var2, var3) + ggplot2::facet_grid(rows = ggplot2::vars(data[, var3]), cols = ggplot2::vars(data[, var4]), scales = "free") +
+    cowplot::background_grid(major = "xy", minor = "none") +
+    cowplot::panel_border() + ggplot2::ggtitle("Interactions")
+
+    ggplot2::ggsave("interraction.png", mult_graph)
+  }
+}
+
+####Collinearity among covariates####
+# Create the plots
+
+coli <- function(data, var) {
+  if (mult2) {
+    cat("\nThere is not enough data on these species they appear too few times in the tabular-file\n", file = "Data.txt", fill = 1, append = TRUE)
+    for (spe in unique(data$species)) {
+      nb_spe <- sum(data$species == spe)
+      if (nb_spe <= 2) {
+      cat(spe, file = "Data.txt", fill = 1, append = TRUE)
+      }else{
+      data_cut <- data[data$species == spe, ]
+      nb <- ncol(data_cut)
+      data_num <- data_cut[, -nb]
+      graph <- GGally::ggpairs(data_num, ggplot2::aes(color = data_cut$species),
+      lower = list(continuous = "points"), axisLabels = "internal")
+
+      ggplot2::ggsave(paste0("collinarity_of_", spe, ".png"), graph, width = 20, height = 15)
+      }
+  }
+
+  }else{
+    nb <- ncol(data)
+    data_cut <- data[, -nb]
+    graph <- GGally::ggpairs(data_cut, ggplot2::aes(color = data[, var]),
+    lower = list(continuous = "points"), axisLabels = "internal") +
+  ggplot2::scale_colour_manual(values = c("#00AFBB", "#E7B800", "#FC4E07")) +
+  ggplot2::scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))
+
+  ggplot2::ggsave("collinarity.png", graph)
+  }
+}
+
+####PCA method####
+
+plot_pca <- function(data) {
+  #Correlation circle
+  graph_corr <- factoextra::fviz_pca_var(active_data(data), col.var = "cos2",
+                           gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
+                           repel = TRUE #Avoid text overlap
+                           )
+  ggplot2::ggsave("Pca_circle.png", graph_corr)
+}
+
+plot_qual <- function(data) {
+  #PCA results for variables
+  var <- factoextra::get_pca_var(active_data(data))
+
+  #representation quality
+  graph_quality <- ggcorrplot::ggcorrplot(var$cos2[!apply(var$cos2, 1, anyNA), ], method = "circle",
+  ggtheme = ggplot2::theme_gray,
+  colors = c("#00AFBB", "#E7B800", "#FC4E07"))
+
+  ggplot2::ggsave("Pca_quality.png", graph_quality)
+}
+
+#### Variance inflation factor ####
+
+myvif <- function(mod) {
+  v <- vcov(mod)
+  assign <- attributes(model.matrix(mod))$assign
+  if (names(coefficients(mod)[1]) == "(Intercept)") {
+    v <- v[-1, -1]
+    assign <- assign[-1]
+  } else warning("No intercept: vifs may not be sensible.")
+  terms <- labels(terms(mod))
+  n_terms <- length(terms)
+  if (n_terms < 2) stop("The model contains fewer than 2 terms")
+  if (length(assign) > dim(v)[1]) {
+     diag(tmp_cor) <- 0
+     if (any(tmp_cor == 1.0)) {
+        return("Sample size is too small, 100% collinearity is present")
+     } else {
+        return("Sample size is too small")
+     }
+  }
+  r <- cov2cor(v)
+  detr <- det(r)
+  result <- matrix(0, n_terms, 3)
+  rownames(result) <- terms
+  colnames(result) <- c("GVIF", "Df", "GVIF^(1/2Df)")
+  for (term in 1:n_terms) {
+    subs <- which(assign == term)
+    result[term, 1] <- det(as.matrix(r[subs, subs])) * det(as.matrix(r[-subs, -subs])) / detr
+    result[term, 2] <- length(subs)
+  }
+  if (all(result[, 2] == 1)) {
+    result <- data.frame(GVIF = result[, 1])
+  } else {
+    result[, 3] <- result[, 1] ^ (1 / (2 * result[, 2]))
+  }
+  invisible(result)
+}
+corvif1 <- function(dataz) {
+    dataz <- as.data.frame(dataz)
+    #correlation part
+    tmp_cor <- cor(dataz, use = "complete.obs")
+    return(tmp_cor)
+}
+corvif2 <- function(dataz) {
+    dataz <- as.data.frame(dataz)
+    #vif part
+    form    <- formula(paste("fooy ~ ", paste(strsplit(names(dataz), " "), collapse = " + ")))
+    dataz   <- data.frame(fooy = 1, dataz)
+    lm_mod  <- lm(form, dataz)
+
+    return(myvif(lm_mod))
+}
+
+#Autocorrelation
+if (auto) {
+obj1 <- acf_tb(data, var = colvar)
+obj2 <- acf_df(data, var = colvar)
+autocorr(var1 = obj1, var2 = obj2)
+}
+
+if (interr) {
+#Interractions
+mult1 <- ifelse(length(unique(data[, colspe])) <= 6, FALSE, TRUE)
+interraction(data, var1 = colvar, var2 = colvar2, var3 = colspe, var4 = colvar4)
+}
+
+#Collinearity
+if (colli) {
+mult2 <- ifelse(length(unique(data[, spe])) < 3, FALSE, TRUE)
+coli(data = data_num, var = spe)
+}
+
+#PCA
+if (pca) {
+active_data <- function(data) {
+  #Calcul of PCA for the active data
+  res_pca <- FactoMineR::PCA(data, graph = FALSE)
+
+return(res_pca)
+}
+
+#eigenvalue
+eig_val <- capture.output(factoextra::get_eigenvalue(active_data(data_active)))
+
+cat("\nwrite table with eigenvalue. \n--> \"", paste(eig_val, "\"\n", sep = ""), file = "valeurs.txt", sep = "", append = TRUE)
+
+plot_pca(data_active)
+plot_qual(data_active)
+}
+
+#VIF
+if (vif) {
+#Compute 2 tables#
+tb_corr <- as.data.frame(corvif1(dataz = data_active))
+tb_corr <- cbind(x = rownames(tb_corr), tb_corr)
+tb_vif <- corvif2(dataz = data_active)
+
+write.table(tb_corr, "corr.tabular", row.names = FALSE, quote = FALSE, sep = "\t", dec = ".", fileEncoding = "UTF-8")
+
+if (all(is.na(tb_vif))) {
+  tb_vif <- NULL
+  cat("Vif couldn't be calculated, selected data isn't correlated")
+}else{
+write.table(tb_vif, "vif.tabular", row.names = FALSE, quote = FALSE, sep = "\t", dec = ".", fileEncoding = "UTF-8")
+}
+}