Mercurial > repos > iuc > phyloseq_plot_richness
comparison phyloseq_plot_bar.R @ 6:a20bc31f2821 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 26f87cc62468c9c329b33246af4f14e2659856f9
| author | iuc |
|---|---|
| date | Fri, 10 Jan 2025 14:58:27 +0000 |
| parents | 1a6c2cc92c6e |
| children | 4faa9c663b38 |
comparison
equal
deleted
inserted
replaced
| 5:ba9c14a1064e | 6:a20bc31f2821 |
|---|---|
| 24 help = "Facet by variable (optional)" | 24 help = "Facet by variable (optional)" |
| 25 ), | 25 ), |
| 26 make_option(c("--output"), | 26 make_option(c("--output"), |
| 27 action = "store", dest = "output", | 27 action = "store", dest = "output", |
| 28 help = "Output file (PDF)" | 28 help = "Output file (PDF)" |
| 29 ), | |
| 30 make_option(c("--topX"), | |
| 31 action = "store", dest = "topX", default = NULL, | |
| 32 help = "Show only the top X taxa based on abundance (e.g., '10') (optional)" | |
| 33 ), | |
| 34 make_option(c("--keepOthers"), | |
| 35 action = "store_true", dest = "keepOthers", default = FALSE, | |
| 36 help = "Keep taxa not in topX and label them as 'Others' (optional)" | |
| 37 ), | |
| 38 make_option(c("--keepNonAssigned"), | |
| 39 action = "store_true", dest = "keepNonAssigned", default = FALSE, | |
| 40 help = "Keep taxa labeled as 'Not Assigned' (optional)" | |
| 41 ), | |
| 42 make_option(c("--normalize"), | |
| 43 action = "store_true", dest = "normalize", default = FALSE, | |
| 44 help = "Normalize abundances to sum to 100% (optional)" | |
| 45 ), | |
| 46 make_option(c("--width"), | |
| 47 action = "store", dest = "width", default = 10, | |
| 48 type = "numeric", help = "Width of the output plot in inches" | |
| 49 ), | |
| 50 make_option(c("--height"), | |
| 51 action = "store", dest = "height", default = 8, | |
| 52 type = "numeric", help = "Height of the output plot in inches" | |
| 53 ), | |
| 54 make_option(c("--device"), | |
| 55 action = "store", dest = "device", default = "pdf", | |
| 56 help = "Output format (e.g., 'pdf', 'png', 'jpeg')" | |
| 29 ) | 57 ) |
| 30 ) | 58 ) |
| 31 | 59 |
| 32 # Parse arguments | 60 # Parse arguments |
| 33 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) | 61 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) |
| 36 | 64 |
| 37 # Validate required options | 65 # Validate required options |
| 38 if (is.null(opt$input) || opt$input == "") { | 66 if (is.null(opt$input) || opt$input == "") { |
| 39 stop("Error: Input file is required.") | 67 stop("Error: Input file is required.") |
| 40 } | 68 } |
| 41 if (is.null(opt$x) || opt$x == "") { | |
| 42 stop("Error: X-axis variable is required.") | |
| 43 } | |
| 44 if (is.null(opt$output) || opt$output == "") { | 69 if (is.null(opt$output) || opt$output == "") { |
| 45 stop("Error: Output file is required.") | 70 stop("Error: Output file is required.") |
| 46 } | 71 } |
| 47 | 72 |
| 48 # Load phyloseq object | 73 # Load phyloseq object |
| 49 print(paste("Trying to read:", opt$input)) | 74 print(paste("Trying to read:", opt$input)) |
| 50 physeq <- readRDS(opt$input) | 75 physeq <- readRDS(opt$input) |
| 51 | 76 |
| 77 # Normalize to relative abundances if requested | |
| 78 if (opt$normalize) { | |
| 79 print("Normalizing abundances to sum to 100%...") | |
| 80 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) | |
| 81 } | |
| 82 | |
| 83 if (opt$keepNonAssigned) { | |
| 84 # Add synthetic "Not Assigned" for missing/NA taxa | |
| 85 tax_table(physeq) <- apply(tax_table(physeq), c(1, 2), function(x) ifelse(is.na(x) | x == "", "Not Assigned", x)) | |
| 86 } | |
| 52 # Check if the 'x' and 'fill' variables are valid | 87 # Check if the 'x' and 'fill' variables are valid |
| 53 sample_vars <- colnames(sample_data(physeq)) | 88 sample_vars <- colnames(sample_data(physeq)) |
| 54 if (!opt$x %in% sample_vars) { | 89 |
| 55 stop(paste("Error: X-axis variable", opt$x, "does not exist in the sample data.")) | 90 # If topX is provided, filter the phyloseq object to show only top X taxa |
| 91 if (!is.null(opt$topX) && opt$topX != "") { | |
| 92 topX <- as.numeric(opt$topX) | |
| 93 if (is.na(topX) || topX <= 0) { | |
| 94 stop("Error: topX should be a positive number.") | |
| 95 } | |
| 96 | |
| 97 # Aggregate the data at the selected rank (e.g., Phylum) | |
| 98 tax_rank <- opt$fill # Adjust as necessary | |
| 99 physeq_agg <- tax_glom(physeq, taxrank = tax_rank) | |
| 100 | |
| 101 # Get the abundance of each taxon at the selected rank | |
| 102 taxa_abundance <- taxa_sums(physeq_agg) | |
| 103 | |
| 104 # Summarize the abundance at each taxonomic rank (grouping by taxonomic name) | |
| 105 tax_table_agg <- tax_table(physeq_agg) | |
| 106 taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum) | |
| 107 | |
| 108 # Identify the top X taxa by summed abundance | |
| 109 top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX] | |
| 110 | |
| 111 print("Only plotting taxa in TopX taxa group:") | |
| 112 print(top_taxa) | |
| 113 | |
| 114 # Get the OTUs corresponding to the top taxa | |
| 115 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] | |
| 116 | |
| 117 if (opt$keepOthers) { | |
| 118 # Label taxa not in top_taxa as "Others" | |
| 119 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% otus_in_top_taxa] <- "Others" | |
| 120 physeq <- physeq_agg | |
| 121 } else { | |
| 122 # Subset the phyloseq object to keep only the top X taxa | |
| 123 physeq_filtered <- prune_taxa(otus_in_top_taxa, physeq_agg) | |
| 124 physeq <- physeq_filtered | |
| 125 } | |
| 56 } | 126 } |
| 57 | 127 |
| 58 # Generate bar plot | 128 # Generate bar plot |
| 59 p <- plot_bar(physeq, x = opt$x, fill = opt$fill) | 129 if (!is.null(opt$x) && opt$x != "") { |
| 130 p <- plot_bar(physeq, x = opt$x, fill = opt$fill) | |
| 131 } else { | |
| 132 p <- plot_bar(physeq, fill = opt$fill) # If no x is provided, don't include x | |
| 133 } | |
| 60 | 134 |
| 61 # Only facet if the facet variable is provided and exists in the sample data | 135 # Only facet if the facet variable is provided and exists in the sample data |
| 62 if (!is.null(opt$facet) && opt$facet != "") { | 136 if (!is.null(opt$facet) && opt$facet != "") { |
| 63 if (opt$facet %in% sample_vars) { | 137 if (opt$facet %in% sample_vars) { |
| 64 p <- p + facet_wrap(as.formula(paste("~", opt$facet))) | 138 p <- p + facet_wrap(as.formula(paste("~", opt$facet))) |
| 65 } else { | 139 } else { |
| 66 warning(paste("Facet variable", opt$facet, "does not exist in the sample data. Faceting will be skipped.")) | 140 warning(paste("Facet variable", opt$facet, "does not exist in the sample data. Faceting will be skipped.")) |
| 67 } | 141 } |
| 68 } | 142 } |
| 69 | 143 |
| 70 # Save to output file using PDF device | 144 # Save to output file |
| 71 print(paste("Saving plot to:", opt$output)) | 145 ggsave( |
| 72 pdf(file = opt$output, width = 10, height = 8) | 146 filename = opt$output, |
| 73 print(p) | 147 plot = p, |
| 74 dev.off() | 148 width = opt$width, |
| 149 height = opt$height, | |
| 150 device = opt$device | |
| 151 ) |
