changeset 6:f9543efabd85 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 26f87cc62468c9c329b33246af4f14e2659856f9
author iuc
date Fri, 10 Jan 2025 14:59:00 +0000
parents e8f58a93ff19
children
files phyloseq_plot_bar.R
diffstat 1 files changed, 88 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/phyloseq_plot_bar.R	Tue Jan 07 17:58:14 2025 +0000
+++ b/phyloseq_plot_bar.R	Fri Jan 10 14:59:00 2025 +0000
@@ -26,6 +26,34 @@
     make_option(c("--output"),
         action = "store", dest = "output",
         help = "Output file (PDF)"
+    ),
+    make_option(c("--topX"),
+        action = "store", dest = "topX", default = NULL,
+        help = "Show only the top X taxa based on abundance (e.g., '10') (optional)"
+    ),
+    make_option(c("--keepOthers"),
+        action = "store_true", dest = "keepOthers", default = FALSE,
+        help = "Keep taxa not in topX and label them as 'Others' (optional)"
+    ),
+    make_option(c("--keepNonAssigned"),
+        action = "store_true", dest = "keepNonAssigned", default = FALSE,
+        help = "Keep taxa labeled as 'Not Assigned' (optional)"
+    ),
+    make_option(c("--normalize"),
+        action = "store_true", dest = "normalize", default = FALSE,
+        help = "Normalize abundances to sum to 100% (optional)"
+    ),
+    make_option(c("--width"),
+        action = "store", dest = "width", default = 10,
+        type = "numeric", help = "Width of the output plot in inches"
+    ),
+    make_option(c("--height"),
+        action = "store", dest = "height", default = 8,
+        type = "numeric", help = "Height of the output plot in inches"
+    ),
+    make_option(c("--device"),
+        action = "store", dest = "device", default = "pdf",
+        help = "Output format (e.g., 'pdf', 'png', 'jpeg')"
     )
 )
 
@@ -38,9 +66,6 @@
 if (is.null(opt$input) || opt$input == "") {
     stop("Error: Input file is required.")
 }
-if (is.null(opt$x) || opt$x == "") {
-    stop("Error: X-axis variable is required.")
-}
 if (is.null(opt$output) || opt$output == "") {
     stop("Error: Output file is required.")
 }
@@ -49,14 +74,63 @@
 print(paste("Trying to read:", opt$input))
 physeq <- readRDS(opt$input)
 
+# Normalize to relative abundances if requested
+if (opt$normalize) {
+    print("Normalizing abundances to sum to 100%...")
+    physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x))
+}
+
+if (opt$keepNonAssigned) {
+    # Add synthetic "Not Assigned" for missing/NA taxa
+    tax_table(physeq) <- apply(tax_table(physeq), c(1, 2), function(x) ifelse(is.na(x) | x == "", "Not Assigned", x))
+}
 # Check if the 'x' and 'fill' variables are valid
 sample_vars <- colnames(sample_data(physeq))
-if (!opt$x %in% sample_vars) {
-    stop(paste("Error: X-axis variable", opt$x, "does not exist in the sample data."))
+
+# If topX is provided, filter the phyloseq object to show only top X taxa
+if (!is.null(opt$topX) && opt$topX != "") {
+    topX <- as.numeric(opt$topX)
+    if (is.na(topX) || topX <= 0) {
+        stop("Error: topX should be a positive number.")
+    }
+
+    # Aggregate the data at the selected rank (e.g., Phylum)
+    tax_rank <- opt$fill # Adjust as necessary
+    physeq_agg <- tax_glom(physeq, taxrank = tax_rank)
+
+    # Get the abundance of each taxon at the selected rank
+    taxa_abundance <- taxa_sums(physeq_agg)
+
+    # Summarize the abundance at each taxonomic rank (grouping by taxonomic name)
+    tax_table_agg <- tax_table(physeq_agg)
+    taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum)
+
+    # Identify the top X taxa by summed abundance
+    top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX]
+
+    print("Only plotting taxa in TopX taxa group:")
+    print(top_taxa)
+
+    # Get the OTUs corresponding to the top taxa
+    otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa]
+
+    if (opt$keepOthers) {
+        # Label taxa not in top_taxa as "Others"
+        tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% otus_in_top_taxa] <- "Others"
+        physeq <- physeq_agg
+    } else {
+        # Subset the phyloseq object to keep only the top X taxa
+        physeq_filtered <- prune_taxa(otus_in_top_taxa, physeq_agg)
+        physeq <- physeq_filtered
+    }
 }
 
 # Generate bar plot
-p <- plot_bar(physeq, x = opt$x, fill = opt$fill)
+if (!is.null(opt$x) && opt$x != "") {
+    p <- plot_bar(physeq, x = opt$x, fill = opt$fill)
+} else {
+    p <- plot_bar(physeq, fill = opt$fill) # If no x is provided, don't include x
+}
 
 # Only facet if the facet variable is provided and exists in the sample data
 if (!is.null(opt$facet) && opt$facet != "") {
@@ -67,8 +141,11 @@
     }
 }
 
-# Save to output file using PDF device
-print(paste("Saving plot to:", opt$output))
-pdf(file = opt$output, width = 10, height = 8)
-print(p)
-dev.off()
+# Save to output file
+ggsave(
+    filename = opt$output,
+    plot = p,
+    width = opt$width,
+    height = opt$height,
+    device = opt$device
+)