view graph_link_var.r @ 1:3df8937fd6fd draft default tip

"planemo upload for repository https://github.com/Marie59/Data_explo_tools commit 60627aba07951226c8fd6bb3115be4bd118edd4e"
author ecology
date Fri, 13 Aug 2021 18:16:46 +0000
parents 9f679060051a
children
line wrap: on
line source

#Rscript

################################################
##    Link between variables and themselves   ##
################################################

#####Packages : ggplot2
#               Cowplot
#               Car
#               faraway
#               dplyr
#               GGally
#               FactoMiner
#               factoextra
#               ggcorrplot

#####Load arguments

args <- commandArgs(trailingOnly = TRUE)

if (length(args) == 0) {
    stop("This tool needs at least one argument")
}else{
    table <- args[1]
    hr <- args[2]
    colli <- as.logical(args[3])
    vif <- as.logical(args[4])
    pca <- as.logical(args[5])
    interr <- as.logical(args[6])
    auto <- as.logical(args[7])
    spe <- as.numeric(args[8])
    col <- as.numeric(strsplit(args[9], ",")[[1]])
    var <- as.numeric(args[10])
    var2 <- as.numeric(args[11])
    var4 <- as.numeric(args[12])
}

if (hr == "false") {
  hr <- FALSE
}else{
  hr <- TRUE
}

if (length(col) == 1) {
stop("Please select two or more numerical columns")
}

#####Import data
data <- read.table(table, sep = "\t", dec = ".", header = hr, fill = TRUE, encoding = "UTF-8")
if (vif | pca) {
data_active <- data[col]
#Define the active individuals and the active variables for the PCA
}

if (colli | interr) {
colspe <- colnames(data)[spe]
}

if (colli) {
data_num <- data[col]
data_num$species <- data[, spe]
data_num <- data_num[grep("^$", data_num$spe, invert = TRUE), ]
}

if (interr | auto) {
colvar <- colnames(data)[var]
}

if (interr) {
colvar2 <- colnames(data)[var2]
colvar4 <- colnames(data)[var4]
}

#####Your analysis

####Independence of the observations####

acf_tb <- function(data, var) {
obj <- acf(data[, var], na.action = na.pass)
  return(obj)
}

acf_df <- function(data, var) {

tb <- data.frame(acf = acf_tb(data, var)$acf, lag = acf_tb(data, var)$lag)

  return(tb) # Lag: intervalle temporel entre mesures, fréquence à laquelle on mesure l'auto corrélation.
# ACF: indépendance temporelle
}

autocorr <- function(var1, var2) {
  cat("\nACF\n", var2$acf, file = "acf.txt", fill = 1, append = TRUE)
  graph <- ggplot2::ggplot() +
  ggplot2::geom_bar(ggplot2::aes(x = var2$lag, y = var2$acf), stat = "identity", position = "identity", fill = "midnightblue") +
  ggplot2::geom_hline(mapping = ggplot2::aes(yintercept = qnorm((1 + 0.95) / 2) / sqrt(var1$n.used)),
             linetype = "dashed") + # calcul interval de confiance à 95% sans correction du bruit blanc.
  ggplot2::geom_hline(mapping = ggplot2::aes(yintercept = -qnorm((1 + 0.95) / 2) / sqrt(var1$n.used)), linetype = "dashed") + ggplot2::labs(title = "Autocorrelation") + ggplot2::xlab("lag") + ggplot2::ylab("acf")
  ggplot2::theme(plot.title = ggplot2::element_text(hjust = 0.5))

  ggplot2::ggsave("autocorrelation.png", graph)
}

####Interractions####

graph <- function(data, var1, var2, var3) {
  graph <- ggplot2::ggplot(data, ggplot2::aes_string(x = var1, y = var2, group = var3, color = var3)) +
  ggplot2::geom_point() +
  ggplot2::geom_smooth(method = lm, se = FALSE) +
  ggplot2::theme(plot.title = ggplot2::element_text(color = "black", size = 12, face = "bold"))

return(graph)
}

# Put multiple panels
interraction <- function(data, var1, var2, var3, var4) {
  cat("\nSpecies\n", spe, file = "Species.txt", fill = 1, append = TRUE)
  if (mult1) {
      for (spe in unique(data[, var3])) {
      data_cut <- data[data[, var3] == spe, ]
      mult_graph <- graph(data_cut, var1, var2, var3) + ggplot2::facet_grid(cols = ggplot2::vars(data_cut[, var4]), scales = "free") +
      cowplot::background_grid(major = "xy", minor = "none") +
      cowplot::panel_border() + ggplot2::ggtitle("Interactions")

      ggplot2::ggsave(paste("interaction_of_", spe, ".png"), mult_graph, width = 10, height = 7)
      }
    }else{
    mult_graph <- graph(data, var1, var2, var3) + ggplot2::facet_grid(rows = ggplot2::vars(data[, var3]), cols = ggplot2::vars(data[, var4]), scales = "free") +
    cowplot::background_grid(major = "xy", minor = "none") +
    cowplot::panel_border() + ggplot2::ggtitle("Interactions")

    ggplot2::ggsave("interraction.png", mult_graph)
  }
}

####Collinearity among covariates####
# Create the plots

coli <- function(data, var) {
  if (mult2) {
    cat("\nThere is not enough data on these species they appear too few times in the tabular-file\n", file = "Data.txt", fill = 1, append = TRUE)
    for (spe in unique(data$species)) {
      nb_spe <- sum(data$species == spe)
      if (nb_spe <= 2) {
      cat(spe, file = "Data.txt", fill = 1, append = TRUE)
      }else{
      data_cut <- data[data$species == spe, ]
      nb <- ncol(data_cut)
      data_num <- data_cut[, -nb]
      graph <- GGally::ggpairs(data_num, ggplot2::aes(color = data_cut$species),
      lower = list(continuous = "points"), axisLabels = "internal")

      ggplot2::ggsave(paste0("collinarity_of_", spe, ".png"), graph, width = 20, height = 15)
      }
  }

  }else{
    nb <- ncol(data)
    data_cut <- data[, -nb]
    graph <- GGally::ggpairs(data_cut, ggplot2::aes(color = data[, var]),
    lower = list(continuous = "points"), axisLabels = "internal") +
  ggplot2::scale_colour_manual(values = c("#00AFBB", "#E7B800", "#FC4E07")) +
  ggplot2::scale_fill_manual(values = c("#00AFBB", "#E7B800", "#FC4E07"))

  ggplot2::ggsave("collinarity.png", graph)
  }
}

####PCA method####

plot_pca <- function(data) {
  #Correlation circle
  graph_corr <- factoextra::fviz_pca_var(active_data(data), col.var = "cos2",
                           gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
                           repel = TRUE #Avoid text overlap
                           )
  ggplot2::ggsave("Pca_circle.png", graph_corr)
}

plot_qual <- function(data) {
  #PCA results for variables
  var <- factoextra::get_pca_var(active_data(data))

  #representation quality
  graph_quality <- ggcorrplot::ggcorrplot(var$cos2[!apply(var$cos2, 1, anyNA), ], method = "circle",
  ggtheme = ggplot2::theme_gray,
  colors = c("#00AFBB", "#E7B800", "#FC4E07"))

  ggplot2::ggsave("Pca_quality.png", graph_quality)
}

#### Variance inflation factor ####

myvif <- function(mod) {
  v <- vcov(mod)
  assign <- attributes(model.matrix(mod))$assign
  if (names(coefficients(mod)[1]) == "(Intercept)") {
    v <- v[-1, -1]
    assign <- assign[-1]
  } else warning("No intercept: vifs may not be sensible.")
  terms <- labels(terms(mod))
  n_terms <- length(terms)
  if (n_terms < 2) stop("The model contains fewer than 2 terms")
  if (length(assign) > dim(v)[1]) {
     diag(tmp_cor) <- 0
     if (any(tmp_cor == 1.0)) {
        return("Sample size is too small, 100% collinearity is present")
     } else {
        return("Sample size is too small")
     }
  }
  r <- cov2cor(v)
  detr <- det(r)
  result <- matrix(0, n_terms, 3)
  rownames(result) <- terms
  colnames(result) <- c("GVIF", "Df", "GVIF^(1/2Df)")
  for (term in 1:n_terms) {
    subs <- which(assign == term)
    result[term, 1] <- det(as.matrix(r[subs, subs])) * det(as.matrix(r[-subs, -subs])) / detr
    result[term, 2] <- length(subs)
  }
  if (all(result[, 2] == 1)) {
    result <- data.frame(GVIF = result[, 1])
  } else {
    result[, 3] <- result[, 1] ^ (1 / (2 * result[, 2]))
  }
  invisible(result)
}
corvif1 <- function(dataz) {
    dataz <- as.data.frame(dataz)
    #correlation part
    tmp_cor <- cor(dataz, use = "complete.obs")
    return(tmp_cor)
}
corvif2 <- function(dataz) {
    dataz <- as.data.frame(dataz)
    #vif part
    form    <- formula(paste("fooy ~ ", paste(strsplit(names(dataz), " "), collapse = " + ")))
    dataz   <- data.frame(fooy = 1, dataz)
    lm_mod  <- lm(form, dataz)

    return(myvif(lm_mod))
}

#Autocorrelation
if (auto) {
obj1 <- acf_tb(data, var = colvar)
obj2 <- acf_df(data, var = colvar)
autocorr(var1 = obj1, var2 = obj2)
}

if (interr) {
#Interractions
mult1 <- ifelse(length(unique(data[, colspe])) <= 6, FALSE, TRUE)
interraction(data, var1 = colvar, var2 = colvar2, var3 = colspe, var4 = colvar4)
}

#Collinearity
if (colli) {
mult2 <- ifelse(length(unique(data[, spe])) < 3, FALSE, TRUE)
coli(data = data_num, var = spe)
}

#PCA
if (pca) {
active_data <- function(data) {
  #Calcul of PCA for the active data
  res_pca <- FactoMineR::PCA(data, graph = FALSE)

return(res_pca)
}

#eigenvalue
eig_val <- capture.output(factoextra::get_eigenvalue(active_data(data_active)))

cat("\nwrite table with eigenvalue. \n--> \"", paste(eig_val, "\"\n", sep = ""), file = "valeurs.txt", sep = "", append = TRUE)

plot_pca(data_active)
plot_qual(data_active)
}

#VIF
if (vif) {
#Compute 2 tables#
tb_corr <- as.data.frame(corvif1(dataz = data_active))
tb_corr <- cbind(x = rownames(tb_corr), tb_corr)
tb_vif <- corvif2(dataz = data_active)

write.table(tb_corr, "corr.tabular", row.names = FALSE, quote = FALSE, sep = "\t", dec = ".", fileEncoding = "UTF-8")

if (all(is.na(tb_vif))) {
  tb_vif <- NULL
  cat("Vif couldn't be calculated, selected data isn't correlated")
}else{
write.table(tb_vif, "vif.tabular", row.names = FALSE, quote = FALSE, sep = "\t", dec = ".", fileEncoding = "UTF-8")
}
}