comparison phyloseq_plot_bar.R @ 8:bbd9d1022144 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d6888da7aba38b97f6cb827355f2de436565684a
author iuc
date Tue, 04 Feb 2025 14:39:35 +0000
parents eab0b21652c4
children
comparison
equal deleted inserted replaced
7:eab0b21652c4 8:bbd9d1022144
52 type = "numeric", help = "Height of the output plot in inches" 52 type = "numeric", help = "Height of the output plot in inches"
53 ), 53 ),
54 make_option(c("--device"), 54 make_option(c("--device"),
55 action = "store", dest = "device", default = "pdf", 55 action = "store", dest = "device", default = "pdf",
56 help = "Output format (e.g., 'pdf', 'png', 'jpeg')" 56 help = "Output format (e.g., 'pdf', 'png', 'jpeg')"
57 ),
58 make_option(c("--nolines"),
59 type = "logical", default = FALSE,
60 help = "Remove borders (lines) around bars (TRUE/FALSE)"
57 ) 61 )
58 ) 62 )
59 63
60 # Parse arguments 64 # Parse arguments
61 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) 65 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
78 if (opt$normalize) { 82 if (opt$normalize) {
79 print("Normalizing abundances to sum to 100%...") 83 print("Normalizing abundances to sum to 100%...")
80 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) 84 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x))
81 } 85 }
82 86
87 # Debug: Check available taxonomic ranks
88 print("Available taxonomic ranks:")
89 print(colnames(tax_table(physeq)))
90
91 # Handle missing or unassigned taxa for all ranks
83 if (opt$keepNonAssigned) { 92 if (opt$keepNonAssigned) {
84 # Add synthetic "Not Assigned" for missing/NA taxa 93 # Replace NA or empty values with 'Not Assigned' for all ranks
85 tax_table(physeq) <- apply(tax_table(physeq), c(1, 2), function(x) ifelse(is.na(x) | x == "", "Not Assigned", x)) 94 tax_ranks <- colnames(tax_table(physeq))
95
96 for (rank in tax_ranks) {
97 if (rank %in% colnames(tax_table(physeq))) {
98 # replace NA or empty values with 'Not Assigned'
99 tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned"
100 }
101 }
86 } 102 }
87 # Check if the 'x' and 'fill' variables are valid
88 sample_vars <- colnames(sample_data(physeq))
89 103
90 # If topX is provided, filter the phyloseq object to show only top X taxa 104 # Filter to top X taxa if requested
91 if (!is.null(opt$topX) && opt$topX != "") { 105 if (!is.null(opt$topX) && opt$topX != "") {
92 topX <- as.numeric(opt$topX) 106 topX <- as.numeric(opt$topX)
93 if (is.na(topX) || topX <= 0) { 107 if (is.na(topX) || topX <= 0) {
94 stop("Error: topX should be a positive number.") 108 stop("Error: topX should be a positive number.")
95 } 109 }
96 110
97 # Aggregate the data at the selected rank (e.g., Phylum) 111 tax_rank <- opt$fill
98 tax_rank <- opt$fill # Adjust as necessary 112 if (!tax_rank %in% colnames(tax_table(physeq))) {
113 stop(paste("Error: Tax rank", tax_rank, "not found in tax_table."))
114 }
115
99 physeq_agg <- tax_glom(physeq, taxrank = tax_rank) 116 physeq_agg <- tax_glom(physeq, taxrank = tax_rank)
100
101 # Get the abundance of each taxon at the selected rank
102 taxa_abundance <- taxa_sums(physeq_agg) 117 taxa_abundance <- taxa_sums(physeq_agg)
103
104 # Summarize the abundance at each taxonomic rank (grouping by taxonomic name)
105 tax_table_agg <- tax_table(physeq_agg) 118 tax_table_agg <- tax_table(physeq_agg)
106 taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum) 119 taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum)
107
108 # Identify the top X taxa by summed abundance
109 top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX] 120 top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX]
110 121
111 print("Only plotting taxa in TopX taxa group:") 122 print("Top taxa:")
112 print(top_taxa) 123 print(top_taxa)
113 124
114 # Get the OTUs corresponding to the top taxa
115 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] 125 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa]
116 126
117 if (opt$keepOthers) { 127 if (opt$keepOthers) {
118 # Label taxa not in top_taxa as "Others" 128 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others"
119 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% otus_in_top_taxa] <- "Others"
120 physeq <- physeq_agg 129 physeq <- physeq_agg
121 } else { 130 } else {
122 # Subset the phyloseq object to keep only the top X taxa 131 physeq <- prune_taxa(otus_in_top_taxa, physeq_agg)
123 physeq_filtered <- prune_taxa(otus_in_top_taxa, physeq_agg)
124 physeq <- physeq_filtered
125 } 132 }
126 } 133 }
127 134
128 # Generate bar plot 135 # Generate bar plot
129 if (!is.null(opt$x) && opt$x != "") { 136 if (!is.null(opt$x) && opt$x != "") {
130 p <- plot_bar(physeq, x = opt$x, fill = opt$fill) + 137 p <- plot_bar(physeq, x = opt$x, fill = opt$fill) +
131 geom_bar(aes(color = NULL, fill = !!sym(opt$fill)), stat = "identity", position = "stack") 138 geom_bar(aes(fill = !!sym(opt$fill)),
139 stat = "identity", position = "stack",
140 color = ifelse(opt$nolines, NA, "black")
141 )
132 } else { 142 } else {
133 p <- plot_bar(physeq, fill = opt$fill) + 143 p <- plot_bar(physeq, fill = opt$fill) +
134 geom_bar(aes(color = NULL, fill = !!sym(opt$fill)), stat = "identity", position = "stack") 144 geom_bar(aes(fill = !!sym(opt$fill)),
145 stat = "identity", position = "stack",
146 color = ifelse(opt$nolines, NA, "black")
147 )
135 } 148 }
136 149
137 # Only facet if the facet variable is provided and exists in the sample data 150 # Optional: Add faceting if specified
138 if (!is.null(opt$facet) && opt$facet != "") { 151 if (!is.null(opt$facet) && opt$facet != "") {
152 sample_vars <- colnames(sample_data(physeq))
139 if (opt$facet %in% sample_vars) { 153 if (opt$facet %in% sample_vars) {
140 p <- p + facet_wrap(as.formula(paste("~", opt$facet))) 154 p <- p + facet_wrap(as.formula(paste("~", opt$facet)))
141 } else { 155 } else {
142 warning(paste("Facet variable", opt$facet, "does not exist in the sample data. Faceting will be skipped.")) 156 warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting."))
143 } 157 }
144 } 158 }
145
146 159
147 # Save to output file 160 # Save to output file
148 ggsave( 161 ggsave(
149 filename = opt$output, 162 filename = opt$output,
150 plot = p, 163 plot = p,