diff aldex2.R @ 0:f4d0bd4b4d6d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/aldex2 commit b99f09cf03f075a6881d192b0f1233233289fa60
author iuc
date Wed, 29 Jun 2022 07:36:45 +0000
parents
children 75214276e2b7
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/aldex2.R	Wed Jun 29 07:36:45 2022 +0000
@@ -0,0 +1,150 @@
+#!/usr/bin/env Rscript
+
+suppressPackageStartupMessages(library("ALDEx2"))
+suppressPackageStartupMessages(library("data.table"))
+suppressPackageStartupMessages(library("qgraph"))
+suppressPackageStartupMessages(library("optparse"))
+
+option_list <- list(
+    make_option(c("--aldex_test"), action = "store", dest = "aldex_test", default = NULL, help = "Indicates which analysis to perform"),
+    make_option(c("--analysis_type"), action = "store", dest = "analysis_type", help = "Indicates which analysis to perform"),
+    make_option(c("--cutoff_effect"), action = "store", dest = "cutoff_effect", type = "integer", default = NULL, help = "Effect size cutoff for plotting"),
+    make_option(c("--cutoff_pval"), action = "store", dest = "cutoff_pval", type = "double", default = NULL, help = "Benjamini-Hochberg fdr cutoff"),
+    make_option(c("--denom"), action = "store", dest = "denom", help = "Indicates which features to retain as the denominator for the Geometric Mean calculation"),
+    make_option(c("--effect"), action = "store", dest = "effect", default = "false", help = "Calculate abundances and effect sizes"),
+    make_option(c("--feature_name"), action = "store", dest = "feature_name", default = NULL, help = "Name of the feature from the input data to be plotted"),
+    make_option(c("--group_names"), action = "store", dest = "group_names", help = "Group names vector"),
+    make_option(c("--group_nums"), action = "store", dest = "group_nums", default = NULL, help = "Group number for continuous numeric vector"),
+    make_option(c("--hist_plot"), action = "store", dest = "hist_plot", default = "false", help = "Indicates whether to plot a histogram of p-values for the first Dirichlet Monte Carlo instance"),
+    make_option(c("--include_sample_summary"), action = "store", dest = "include_sample_summary", default = "false", help = "Include median clr values for each sample"),
+    make_option(c("--iterate"), action = "store", dest = "iterate", default = "false", help = "Indicates whether to iteratively perform a test"),
+    make_option(c("--num_cols"), action = "store", dest = "num_cols", help = "Number of columns in group vector"),
+    make_option(c("--num_cols_in_groups"), action = "store", dest = "num_cols_in_groups", default = NULL, help = "Number of columns in each group dewfining the continuous numeric vector"),
+    make_option(c("--num_mc_samples"), action = "store", dest = "num_mc_samples", type = "integer", help = "Number of Monte Carlo samples"),
+    make_option(c("--output"), action = "store", dest = "output", help = "output file"),
+    make_option(c("--paired_test"), action = "store", dest = "paired_test", default = "false", help = "Indicates whether to do paired-sample tests"),
+    make_option(c("--plot_test"), action = "store", dest = "plot_test", default = NULL, help = "The method of calculating significance"),
+    make_option(c("--plot_type"), action = "store", dest = "plot_type", default = NULL, help = "The type of plot to be produced"),
+    make_option(c("--reads"), action = "store", dest = "reads", help = "Input reads table"),
+    make_option(c("--xlab"), action = "store", dest = "xlab", default = NULL, help = "x lable for the plot"),
+    make_option(c("--ylab"), action = "store", dest = "ylab", default = NULL, help = "y lable for the plot")
+)
+
+parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
+args <- parse_args(parser, positional_arguments = TRUE)
+opt <- args$options
+
+get_boolean_value <- function(val) {
+    if (val == "true") {
+        return(TRUE)
+    } else {
+        return(FALSE)
+    }
+}
+
+# Read the input reads file into a data frame.
+reads_df <- read.table(file = opt$reads, header = TRUE, sep = "\t", row.names = 1, dec = ".", as.is = FALSE)
+
+# Split the group_names and num_cols into lists of strings.
+group_names_str <- as.character(opt$group_names)
+group_names_list <- strsplit(group_names_str, ",")[[1]]
+num_cols_str <- as.character(opt$num_cols)
+num_cols_list <- strsplit(num_cols_str, ",")[[1]]
+# Construct conditions vector.
+conditions_vector <- c()
+for (i in seq_along(num_cols_list)) {
+    num_cols <- as.integer(num_cols_list[i])
+    group_name <- group_names_list[i]
+    for (j in 1:num_cols) {
+        conditions_vector <- cbind(conditions_vector, group_name)
+    }
+}
+# The conditions_vector is now a matrix,
+# so coerce it back to a vector.
+conditions_vector <- as.vector(conditions_vector)
+
+# Convert boolean values to boolean.
+effect <- get_boolean_value(opt$effect)
+include_sample_summary <- get_boolean_value(opt$include_sample_summary)
+iterate <- get_boolean_value(opt$iterate)
+
+if (opt$analysis_type == "aldex") {
+    aldex_obj <- aldex(reads = reads_df,
+                       conditions_vector,
+                       mc.samples = opt$num_mc_samples,
+                       test = opt$aldex_test,
+                       effect = effect,
+                       include.sample.summary = include_sample_summary,
+                       denom = opt$denom,
+                       iterate = iterate)
+} else {
+    # Generate Monte Carlo samples of the Dirichlet distribution for each sample. Convert each
+    # instance using a log-ratio transform. This is the input for all further analyses.
+    aldex_clr_obj <- aldex.clr(reads_df, conditions_vector, mc.samples = opt$num_mc_samples, denom = opt$denom)
+
+    if (opt$analysis_type == "aldex_corr") {
+        if (!is.null(opt$cont_var)) {
+            # Read the input cont_var vector.
+            cont_var <- as.numeric(read.table(file = opt$cont_var, header = FALSE, sep = "\t"))
+        }
+
+        # Split the group_names and num_cols into lists of strings.
+        group_nums_str <- as.character(opt$group_nums)
+        group_nums_list <- strsplit(group_nums_str, ",")[[1]]
+        num_cols_in_groups_str <- as.character(opt$num_cols_in_groups)
+        num_cols_in_groups_list <- strsplit(num_cols_in_groups_str, ",")[[1]]
+        # Construct continuous numeric vector.
+        cont_var_vector <- c()
+        for (i in seq_along(num_cols_in_groups_list)) {
+            num_cols_in_group <- as.integer(num_cols_in_groups_list[i])
+            group_num <- group_nums_list[i]
+            for (j in 1:num_cols_in_group) {
+                cont_var_vector <- cbind(cont_var_vector, group_num)
+            }
+        }
+        # The cont_var_vector is now a matrix,
+        # so coerce it back to a vector.
+        cont_var_vector <- as.numeric(as.vector(cont_var_vector))
+
+        aldex_obj <- aldex.corr(aldex_clr_obj, cont.var = cont_var_vector)
+    } else if (opt$analysis_type == "aldex_effect") {
+        aldex_obj <- aldex.effect(aldex_clr_obj, include_sample_summary)
+    } else if (opt$analysis_type == "aldex_expected_distance") {
+        dist <- aldex.expectedDistance(aldex_clr_obj)
+        png(filename = opt$output)
+        qgraph(dist, layout = "spring", vsize = 1)
+        dev.off()
+    } else if (opt$analysis_type == "aldex_kw") {
+        aldex_obj <- aldex.kw(aldex_clr_obj)
+    } else if (opt$analysis_type == "aldex_plot") {
+        aldex_obj <- aldex(reads = reads_df,
+                           conditions_vector,
+                           mc.samples = opt$num_mc_samples,
+                           test = opt$aldex_test,
+                           effect = effect,
+                           include.sample.summary = include_sample_summary,
+                           denom = opt$denom,
+                           iterate = iterate)
+        png(filename = opt$output)
+        aldex.plot(x = aldex_obj,
+                   type = opt$plot_type,
+                   test = opt$plot_test,
+                   cutoff.pval = opt$cutoff_pval,
+                   cutoff.effect = opt$cutoff_effect,
+                   xlab = opt$xlab,
+                   ylab = opt$ylab)
+        dev.off()
+    } else if (opt$analysis_type == "aldex_plot_feature") {
+        png(filename = opt$output)
+        aldex.plotFeature(aldex_clr_obj, opt$feature_name)
+        dev.off()
+    } else if (opt$analysis_type == "aldex_ttest") {
+        paired_test <- get_boolean_value(opt$paired_test)
+        hist_plot <- get_boolean_value(opt$hist_plot)
+        aldex_obj <- aldex.ttest(aldex_clr_obj, paired.test = paired_test, hist.plot = hist_plot)
+    }
+}
+if ((opt$analysis_type != "aldex_expected_distance") && (opt$analysis_type != "aldex_plot") && (opt$analysis_type != "aldex_plot_feature")) {
+    # Output the ALDEx object.
+    write.table(aldex_obj, file = opt$output, append = FALSE, sep = "\t", dec = ".", row.names = FALSE, col.names = TRUE)
+}