Mercurial > repos > iuc > phyloseq_plot_richness
comparison phyloseq_plot_bar.R @ 9:10a7732528b2 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit a5ae2f86b2955290a4c81ab234f02cc51020f970
| author | iuc |
|---|---|
| date | Thu, 13 Mar 2025 09:48:48 +0000 |
| parents | d6cbeb48294d |
| children |
comparison
equal
deleted
inserted
replaced
| 8:d6cbeb48294d | 9:10a7732528b2 |
|---|---|
| 2 | 2 |
| 3 # Load libraries | 3 # Load libraries |
| 4 suppressPackageStartupMessages(library("optparse")) | 4 suppressPackageStartupMessages(library("optparse")) |
| 5 suppressPackageStartupMessages(library("phyloseq")) | 5 suppressPackageStartupMessages(library("phyloseq")) |
| 6 suppressPackageStartupMessages(library("ggplot2")) | 6 suppressPackageStartupMessages(library("ggplot2")) |
| 7 suppressPackageStartupMessages(library("dplyr")) | |
| 7 | 8 |
| 8 # Define options | 9 # Define options |
| 9 option_list <- list( | 10 option_list <- list( |
| 10 make_option(c("--input"), | 11 make_option(c("--input"), |
| 11 action = "store", dest = "input", | 12 action = "store", dest = "input", |
| 15 action = "store", dest = "x", | 16 action = "store", dest = "x", |
| 16 help = "Variable for x-axis (e.g., 'Sample', 'Phylum')" | 17 help = "Variable for x-axis (e.g., 'Sample', 'Phylum')" |
| 17 ), | 18 ), |
| 18 make_option(c("--fill"), | 19 make_option(c("--fill"), |
| 19 action = "store", dest = "fill", default = NULL, | 20 action = "store", dest = "fill", default = NULL, |
| 20 help = "Variable for fill color (e.g., 'Genus', 'Order') (optional)" | 21 help = "Variable for fill color (e.g., 'Genus', 'Order'). Use 'ASV' as argument to show each OTU/ASV." |
| 21 ), | 22 ), |
| 22 make_option(c("--facet"), | 23 make_option(c("--facet"), |
| 23 action = "store", dest = "facet", default = NULL, | 24 action = "store", dest = "facet", default = NULL, |
| 24 help = "Facet by variable (optional)" | 25 help = "Facet by variable (optional)" |
| 25 ), | 26 ), |
| 41 ), | 42 ), |
| 42 make_option(c("--normalize"), | 43 make_option(c("--normalize"), |
| 43 action = "store_true", dest = "normalize", default = FALSE, | 44 action = "store_true", dest = "normalize", default = FALSE, |
| 44 help = "Normalize abundances to sum to 100% (optional)" | 45 help = "Normalize abundances to sum to 100% (optional)" |
| 45 ), | 46 ), |
| 47 make_option(c("--normalize_x"), | |
| 48 action = "store_true", dest = "normalize_x", default = FALSE, | |
| 49 help = "Normalize x groups to sum up to 100%" | |
| 50 ), | |
| 46 make_option(c("--width"), | 51 make_option(c("--width"), |
| 47 action = "store", dest = "width", default = 10, | 52 action = "store", dest = "width", default = 10, |
| 48 type = "numeric", help = "Width of the output plot in inches" | 53 type = "numeric", help = "Width of the output plot in inches" |
| 49 ), | 54 ), |
| 50 make_option(c("--height"), | 55 make_option(c("--height"), |
| 51 action = "store", dest = "height", default = 8, | 56 action = "store", dest = "height", default = 8, |
| 52 type = "numeric", help = "Height of the output plot in inches" | 57 type = "numeric", help = "Height of the output plot in inches" |
| 53 ), | 58 ), |
| 54 make_option(c("--device"), | 59 make_option(c("--device"), |
| 55 action = "store", dest = "device", default = "pdf", | 60 action = "store", dest = "device", default = "pdf", |
| 56 help = "Output format (e.g., 'pdf', 'png', 'jpeg')" | 61 help = "Output format (e.g., 'pdf', 'png', 'jpg')" |
| 57 ), | 62 ), |
| 58 make_option(c("--nolines"), | 63 make_option(c("--nolines"), |
| 59 type = "logical", default = FALSE, | 64 action = "store_true", dest = "nolines", default = FALSE, |
| 60 help = "Remove borders (lines) around bars (TRUE/FALSE)" | 65 help = "Remove borders (lines) around bars (TRUE/FALSE)" |
| 61 ) | 66 ) |
| 62 ) | 67 ) |
| 68 | |
| 63 | 69 |
| 64 # Parse arguments | 70 # Parse arguments |
| 65 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) | 71 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) |
| 66 args <- parse_args(parser, positional_arguments = TRUE) | 72 args <- parse_args(parser, positional_arguments = TRUE) |
| 67 opt <- args$options | 73 opt <- args$options |
| 68 | 74 |
| 75 | |
| 69 # Validate required options | 76 # Validate required options |
| 70 if (is.null(opt$input) || opt$input == "") { | 77 if (is.null(opt$input) || opt$input == "") { |
| 71 stop("Error: Input file is required.") | 78 stop("Error: Input file is required.") |
| 72 } | 79 } |
| 73 if (is.null(opt$output) || opt$output == "") { | 80 if (is.null(opt$output) || opt$output == "") { |
| 74 stop("Error: Output file is required.") | 81 stop("Error: Output file is required.") |
| 75 } | 82 } |
| 76 | 83 |
| 84 if (is.null(opt$fill) || opt$fill == "") { | |
| 85 print(paste("No fill chosen using ASV")) | |
| 86 opt$fill <- "ASV" | |
| 87 } | |
| 88 | |
| 77 # Load phyloseq object | 89 # Load phyloseq object |
| 78 print(paste("Trying to read:", opt$input)) | 90 print(paste("Trying to read:", opt$input)) |
| 79 physeq <- readRDS(opt$input) | 91 physeq <- readRDS(opt$input) |
| 92 | |
| 93 ## Allow to use OTU as tax group | |
| 94 # Extract rownames (taxids) from the tax_table and add them as a new column | |
| 95 taxids <- rownames(tax_table(physeq)) | |
| 96 | |
| 97 # Get the number of columns in the tax_table | |
| 98 num_columns <- ncol(tax_table(physeq)) | |
| 99 | |
| 100 # Add the taxids as a new last column in the tax_table | |
| 101 tax_table(physeq) <- cbind(tax_table(physeq), taxid = taxids) | |
| 102 | |
| 103 # Rename the last column to 'ASV' / OTU does conflict with phyloseq logic | |
| 104 colnames(tax_table(physeq))[num_columns + 1] <- "ASV" | |
| 80 | 105 |
| 81 # Normalize to relative abundances if requested | 106 # Normalize to relative abundances if requested |
| 82 if (opt$normalize) { | 107 if (opt$normalize) { |
| 83 print("Normalizing abundances to sum to 100%...") | 108 print("Normalizing abundances to sum to 100%...") |
| 84 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) | 109 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) |
| 85 } | 110 } |
| 86 | 111 |
| 87 # Debug: Check available taxonomic ranks | 112 # Debug: Check available taxonomic ranks |
| 113 | |
| 114 tax_ranks <- colnames(tax_table(physeq)) | |
| 115 sample_vars <- colnames(sample_data(physeq)) | |
| 116 | |
| 88 print("Available taxonomic ranks:") | 117 print("Available taxonomic ranks:") |
| 89 print(colnames(tax_table(physeq))) | 118 print(tax_ranks) |
| 119 | |
| 120 print("Available metadata:") | |
| 121 print(sample_vars) | |
| 90 | 122 |
| 91 # Handle missing or unassigned taxa for all ranks | 123 # Handle missing or unassigned taxa for all ranks |
| 92 if (opt$keepNonAssigned) { | 124 if (opt$keepNonAssigned) { |
| 93 # Replace NA or empty values with 'Not Assigned' for all ranks | 125 # Replace NA or empty values with 'Not Assigned' for all ranks |
| 94 tax_ranks <- colnames(tax_table(physeq)) | |
| 95 | 126 |
| 96 for (rank in tax_ranks) { | 127 for (rank in tax_ranks) { |
| 97 if (rank %in% colnames(tax_table(physeq))) { | 128 if (rank %in% tax_ranks) { |
| 98 # replace NA or empty values with 'Not Assigned' | 129 # replace NA or empty values with 'Not Assigned' |
| 99 tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned" | 130 tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned" |
| 100 } | 131 } |
| 101 } | 132 } |
| 102 } | 133 } |
| 122 print("Top taxa:") | 153 print("Top taxa:") |
| 123 print(top_taxa) | 154 print(top_taxa) |
| 124 | 155 |
| 125 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] | 156 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] |
| 126 | 157 |
| 158 # Group non-top OTUs as 'Others' if requested | |
| 127 if (opt$keepOthers) { | 159 if (opt$keepOthers) { |
| 160 # Update the tax_table to assign 'Others' to non-top taxa | |
| 128 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others" | 161 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others" |
| 129 physeq <- physeq_agg | 162 physeq <- physeq_agg |
| 130 } else { | 163 } else { |
| 131 physeq <- prune_taxa(otus_in_top_taxa, physeq_agg) | 164 physeq <- prune_taxa(otus_in_top_taxa, physeq_agg) |
| 132 } | 165 } |
| 133 } | 166 } |
| 134 | 167 |
| 168 | |
| 169 # normalize x groups if needed | |
| 170 if (opt$x %in% sample_vars) { | |
| 171 if (opt$normalize_x && !is.null(opt$x) && opt$x != "") { | |
| 172 physeq_agg <- merge_samples(physeq, opt$x) | |
| 173 | |
| 174 physeq <- transform_sample_counts(physeq_agg, function(x) (x / sum(x) * 100)) | |
| 175 opt$x <- NULL # set to Null since we do not need x for downstream now | |
| 176 opt$facet <- NULL # set to Null since facetting does not work with normalize x | |
| 177 warning(paste("normalize x does not work with facetting")) | |
| 178 } | |
| 179 } else { | |
| 180 warning(paste("x", opt$x, "not found in sample data. Skipping normalize_x.")) | |
| 181 } | |
| 182 | |
| 183 | |
| 184 # Check if the facet variable is valid and exists | |
| 185 facet_var <- NULL | |
| 186 if (!is.null(opt$facet) && opt$facet != "") { | |
| 187 if (opt$facet %in% sample_vars || opt$facet %in% tax_ranks) { | |
| 188 facet_var <- opt$facet # Store facet variable for later | |
| 189 } else { | |
| 190 warning(paste("Facet variable", opt$facet, "not found in sample data or tax ranks. Skipping faceting.")) | |
| 191 } | |
| 192 } | |
| 193 | |
| 194 # Determine if faceting is needed | |
| 195 facet_formula <- if (!is.null(facet_var)) as.formula(paste("~", facet_var)) else NULL | |
| 196 | |
| 197 # Define color based on the `nolines` option | |
| 198 plot_color <- ifelse(opt$nolines, NA, "black") | |
| 199 | |
| 135 # Generate bar plot | 200 # Generate bar plot |
| 136 if (!is.null(opt$x) && opt$x != "") { | 201 if (!is.null(opt$x) && opt$x != "") { |
| 137 p <- plot_bar(physeq, x = opt$x, fill = opt$fill) + | 202 p <- plot_bar(physeq, |
| 138 geom_bar(aes(fill = !!sym(opt$fill)), | 203 x = opt$x, |
| 139 stat = "identity", position = "stack", | 204 fill = opt$fill |
| 140 color = ifelse(opt$nolines, NA, "black") | 205 ) + facet_wrap(facet_formula, scales = "free_x") + |
| 206 geom_bar( | |
| 207 stat = "identity", | |
| 208 position = "stack", | |
| 209 aes(fill = !!sym(opt$fill)), | |
| 210 color = plot_color | |
| 141 ) | 211 ) |
| 142 } else { | 212 } else { |
| 143 p <- plot_bar(physeq, fill = opt$fill) + | 213 p <- plot_bar(physeq, |
| 144 geom_bar(aes(fill = !!sym(opt$fill)), | 214 fill = opt$fill |
| 145 stat = "identity", position = "stack", | 215 ) + facet_wrap(facet_formula, scales = "free_x") + |
| 146 color = ifelse(opt$nolines, NA, "black") | 216 geom_bar( |
| 217 stat = "identity", | |
| 218 position = "stack", | |
| 219 aes(fill = !!sym(opt$fill)), | |
| 220 color = plot_color | |
| 147 ) | 221 ) |
| 148 } | 222 } |
| 149 | 223 |
| 150 # Optional: Add faceting if specified | 224 |
| 151 if (!is.null(opt$facet) && opt$facet != "") { | 225 # Reorder fill levels to ensure "Not Assigned" and "Others" are at the bottom if they exist |
| 152 sample_vars <- colnames(sample_data(physeq)) | 226 fill_values <- unique(p$data[[opt$fill]]) # Get unique fill values |
| 153 if (opt$facet %in% sample_vars) { | 227 new_levels <- setdiff(fill_values, c("Not Assigned", "Others")) # Exclude "Not Assigned" and "Others" |
| 154 p <- p + facet_wrap(as.formula(paste("~", opt$facet))) | 228 |
| 155 } else { | 229 if ("Not Assigned" %in% fill_values) { |
| 156 warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting.")) | 230 new_levels <- c("Not Assigned", new_levels) # Place "Not Assigned" at the bottom if it exists |
| 157 } | 231 } |
| 158 } | 232 if ("Others" %in% fill_values) { |
| 233 new_levels <- c("Others", new_levels) # Place "Others" at the bottom if it exists | |
| 234 } | |
| 235 | |
| 236 # Apply the new levels to the fill variable in the plot data | |
| 237 p$data[[opt$fill]] <- factor(p$data[[opt$fill]], levels = new_levels) | |
| 159 | 238 |
| 160 # Save to output file | 239 # Save to output file |
| 161 ggsave( | 240 ggsave( |
| 162 filename = opt$output, | 241 filename = opt$output, |
| 163 plot = p, | 242 plot = p, |
