Mercurial > repos > iuc > phyloseq_plot_richness
comparison phyloseq_plot_bar.R @ 8:d6cbeb48294d draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d6888da7aba38b97f6cb827355f2de436565684a
| author | iuc |
|---|---|
| date | Tue, 04 Feb 2025 14:39:26 +0000 |
| parents | 4faa9c663b38 |
| children | 10a7732528b2 |
comparison
equal
deleted
inserted
replaced
| 7:4faa9c663b38 | 8:d6cbeb48294d |
|---|---|
| 52 type = "numeric", help = "Height of the output plot in inches" | 52 type = "numeric", help = "Height of the output plot in inches" |
| 53 ), | 53 ), |
| 54 make_option(c("--device"), | 54 make_option(c("--device"), |
| 55 action = "store", dest = "device", default = "pdf", | 55 action = "store", dest = "device", default = "pdf", |
| 56 help = "Output format (e.g., 'pdf', 'png', 'jpeg')" | 56 help = "Output format (e.g., 'pdf', 'png', 'jpeg')" |
| 57 ), | |
| 58 make_option(c("--nolines"), | |
| 59 type = "logical", default = FALSE, | |
| 60 help = "Remove borders (lines) around bars (TRUE/FALSE)" | |
| 57 ) | 61 ) |
| 58 ) | 62 ) |
| 59 | 63 |
| 60 # Parse arguments | 64 # Parse arguments |
| 61 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) | 65 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list) |
| 78 if (opt$normalize) { | 82 if (opt$normalize) { |
| 79 print("Normalizing abundances to sum to 100%...") | 83 print("Normalizing abundances to sum to 100%...") |
| 80 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) | 84 physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x)) |
| 81 } | 85 } |
| 82 | 86 |
| 87 # Debug: Check available taxonomic ranks | |
| 88 print("Available taxonomic ranks:") | |
| 89 print(colnames(tax_table(physeq))) | |
| 90 | |
| 91 # Handle missing or unassigned taxa for all ranks | |
| 83 if (opt$keepNonAssigned) { | 92 if (opt$keepNonAssigned) { |
| 84 # Add synthetic "Not Assigned" for missing/NA taxa | 93 # Replace NA or empty values with 'Not Assigned' for all ranks |
| 85 tax_table(physeq) <- apply(tax_table(physeq), c(1, 2), function(x) ifelse(is.na(x) | x == "", "Not Assigned", x)) | 94 tax_ranks <- colnames(tax_table(physeq)) |
| 95 | |
| 96 for (rank in tax_ranks) { | |
| 97 if (rank %in% colnames(tax_table(physeq))) { | |
| 98 # replace NA or empty values with 'Not Assigned' | |
| 99 tax_table(physeq)[, rank][is.na(tax_table(physeq)[, rank])] <- "Not Assigned" | |
| 100 } | |
| 101 } | |
| 86 } | 102 } |
| 87 # Check if the 'x' and 'fill' variables are valid | |
| 88 sample_vars <- colnames(sample_data(physeq)) | |
| 89 | 103 |
| 90 # If topX is provided, filter the phyloseq object to show only top X taxa | 104 # Filter to top X taxa if requested |
| 91 if (!is.null(opt$topX) && opt$topX != "") { | 105 if (!is.null(opt$topX) && opt$topX != "") { |
| 92 topX <- as.numeric(opt$topX) | 106 topX <- as.numeric(opt$topX) |
| 93 if (is.na(topX) || topX <= 0) { | 107 if (is.na(topX) || topX <= 0) { |
| 94 stop("Error: topX should be a positive number.") | 108 stop("Error: topX should be a positive number.") |
| 95 } | 109 } |
| 96 | 110 |
| 97 # Aggregate the data at the selected rank (e.g., Phylum) | 111 tax_rank <- opt$fill |
| 98 tax_rank <- opt$fill # Adjust as necessary | 112 if (!tax_rank %in% colnames(tax_table(physeq))) { |
| 113 stop(paste("Error: Tax rank", tax_rank, "not found in tax_table.")) | |
| 114 } | |
| 115 | |
| 99 physeq_agg <- tax_glom(physeq, taxrank = tax_rank) | 116 physeq_agg <- tax_glom(physeq, taxrank = tax_rank) |
| 100 | |
| 101 # Get the abundance of each taxon at the selected rank | |
| 102 taxa_abundance <- taxa_sums(physeq_agg) | 117 taxa_abundance <- taxa_sums(physeq_agg) |
| 103 | |
| 104 # Summarize the abundance at each taxonomic rank (grouping by taxonomic name) | |
| 105 tax_table_agg <- tax_table(physeq_agg) | 118 tax_table_agg <- tax_table(physeq_agg) |
| 106 taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum) | 119 taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum) |
| 107 | |
| 108 # Identify the top X taxa by summed abundance | |
| 109 top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX] | 120 top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX] |
| 110 | 121 |
| 111 print("Only plotting taxa in TopX taxa group:") | 122 print("Top taxa:") |
| 112 print(top_taxa) | 123 print(top_taxa) |
| 113 | 124 |
| 114 # Get the OTUs corresponding to the top taxa | |
| 115 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] | 125 otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa] |
| 116 | 126 |
| 117 if (opt$keepOthers) { | 127 if (opt$keepOthers) { |
| 118 # Label taxa not in top_taxa as "Others" | 128 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table_agg) %in% otus_in_top_taxa] <- "Others" |
| 119 tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% otus_in_top_taxa] <- "Others" | |
| 120 physeq <- physeq_agg | 129 physeq <- physeq_agg |
| 121 } else { | 130 } else { |
| 122 # Subset the phyloseq object to keep only the top X taxa | 131 physeq <- prune_taxa(otus_in_top_taxa, physeq_agg) |
| 123 physeq_filtered <- prune_taxa(otus_in_top_taxa, physeq_agg) | |
| 124 physeq <- physeq_filtered | |
| 125 } | 132 } |
| 126 } | 133 } |
| 127 | 134 |
| 128 # Generate bar plot | 135 # Generate bar plot |
| 129 if (!is.null(opt$x) && opt$x != "") { | 136 if (!is.null(opt$x) && opt$x != "") { |
| 130 p <- plot_bar(physeq, x = opt$x, fill = opt$fill) + | 137 p <- plot_bar(physeq, x = opt$x, fill = opt$fill) + |
| 131 geom_bar(aes(color = NULL, fill = !!sym(opt$fill)), stat = "identity", position = "stack") | 138 geom_bar(aes(fill = !!sym(opt$fill)), |
| 139 stat = "identity", position = "stack", | |
| 140 color = ifelse(opt$nolines, NA, "black") | |
| 141 ) | |
| 132 } else { | 142 } else { |
| 133 p <- plot_bar(physeq, fill = opt$fill) + | 143 p <- plot_bar(physeq, fill = opt$fill) + |
| 134 geom_bar(aes(color = NULL, fill = !!sym(opt$fill)), stat = "identity", position = "stack") | 144 geom_bar(aes(fill = !!sym(opt$fill)), |
| 145 stat = "identity", position = "stack", | |
| 146 color = ifelse(opt$nolines, NA, "black") | |
| 147 ) | |
| 135 } | 148 } |
| 136 | 149 |
| 137 # Only facet if the facet variable is provided and exists in the sample data | 150 # Optional: Add faceting if specified |
| 138 if (!is.null(opt$facet) && opt$facet != "") { | 151 if (!is.null(opt$facet) && opt$facet != "") { |
| 152 sample_vars <- colnames(sample_data(physeq)) | |
| 139 if (opt$facet %in% sample_vars) { | 153 if (opt$facet %in% sample_vars) { |
| 140 p <- p + facet_wrap(as.formula(paste("~", opt$facet))) | 154 p <- p + facet_wrap(as.formula(paste("~", opt$facet))) |
| 141 } else { | 155 } else { |
| 142 warning(paste("Facet variable", opt$facet, "does not exist in the sample data. Faceting will be skipped.")) | 156 warning(paste("Facet variable", opt$facet, "not found in sample data. Skipping faceting.")) |
| 143 } | 157 } |
| 144 } | 158 } |
| 145 | |
| 146 | 159 |
| 147 # Save to output file | 160 # Save to output file |
| 148 ggsave( | 161 ggsave( |
| 149 filename = opt$output, | 162 filename = opt$output, |
| 150 plot = p, | 163 plot = p, |
