Mercurial > repos > iuc > phyloseq_plot_ordination
changeset 10:cee4982a717b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
author | iuc |
---|---|
date | Fri, 04 Apr 2025 10:16:43 +0000 |
parents | 32f892954ef6 |
children | |
files | phyloseq_tax_glom.R test-data/SRR1770594.phyloseq test-data/tax_glom_output0.tabular test-data/tax_glom_output1.tabular test-data/tax_glom_output2.tabular test-data/tax_glom_output3.tabular test-data/tax_glom_output4.tabular test-data/tax_glom_output5.tabular |
diffstat | 8 files changed, 153 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phyloseq_tax_glom.R Fri Apr 04 10:16:43 2025 +0000 @@ -0,0 +1,73 @@ +suppressPackageStartupMessages(library("phyloseq")) +suppressPackageStartupMessages(library("dplyr")) +suppressPackageStartupMessages(library("optparse")) + +# Define command-line options +option_list <- list( + make_option(c("-i", "--input"), type = "character", help = "Path to the phyloseq RDS file", metavar = "FILE"), + make_option(c("-r", "--rank"), type = "character", help = "Taxonomic rank for aggregation"), + make_option("--exclude_otu_ids", action = "store_true", default = FALSE, help = "Exclude OTU IDs from output"), + make_option("--single_rank", action = "store_true", default = FALSE, help = "Only output the specified rank column"), + make_option("--exclude_na_values", action = "store_true", default = FALSE, help = "Exclude NA values during tax_glom") +) + +# Parse arguments +opt <- parse_args(OptionParser(option_list = option_list)) + +# Validate arguments +if (is.null(opt$input) || is.null(opt$rank)) { + stop("Error: --input and --rank are required arguments.") +} + +if (opt$single_rank && !opt$exclude_otu_ids) { + stop("Error: --single_rank can only be used if --exclude_otu_ids is also specified.") +} + +# Load the phyloseq object +physeq <- readRDS(opt$input) + +# Print available taxonomic ranks +cat("Available taxonomic ranks:\n") +print(rank_names(physeq)) + +# Print original number of OTUs +cat("Original number of OTUs:", ntaxa(physeq), "\n") + +# Perform tax_glom +physeq_agg <- tax_glom(physeq, taxrank = opt$rank, NArm = opt$exclude_na_values) + +# Print new number of taxa after agglomeration +cat("Number of taxa after agglomeration at", opt$rank, "level:", ntaxa(physeq_agg), "\n") + +# Extract the taxonomy table after agglomeration +tax_table_agg <- as.data.frame(tax_table(physeq_agg)) + +# Convert taxonomic columns to character to preserve NA values +tax_table_agg[] <- lapply(tax_table_agg, as.character) + +# Add OTU ID column unless excluded +if (!opt$exclude_otu_ids) { + tax_table_agg <- cbind("OTU ID" = rownames(tax_table_agg), tax_table_agg) +} + +# Extract OTU abundance table and convert to data frame +otu_table_agg <- as.data.frame(otu_table(physeq_agg)) + +# Append taxonomic information to output +otu_table_agg <- cbind(tax_table_agg, otu_table_agg) + +tax_table_agg <- otu_table_agg + +if (opt$single_rank) { + # Keep only the specified taxonomic rank column and numeric count columns + tax_table_agg <- tax_table_agg %>% select(all_of(opt$rank), where(is.numeric)) + + # Group by taxonomic rank and sum the counts + tax_table_agg <- tax_table_agg %>% + group_by(across(all_of(opt$rank))) %>% + summarise(across(where(is.numeric), sum), .groups = "drop") +} + +# Save the output as a TSV file +output_file <- paste0("physeq_", opt$rank, "_table.tsv") +write.table(tax_table_agg, file = output_file, sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tax_glom_output0.tabular Fri Apr 04 10:16:43 2025 +0000 @@ -0,0 +1,18 @@ +OTU ID Superkingdom Kingdom Phylum Class Order Family Genus sa1 +55939 sk__Archaea k__ p__Thaumarchaeota c__Nitrososphaeria NA NA NA 1 +220170 sk__Archaea k__ p__Thaumarchaeota c__Nitrososphaeria o__Nitrososphaerales f__Nitrososphaeraceae NA 1 +107861 sk__Bacteria k__ p__Acidobacteria c__Acidobacteriia o__Bryobacterales f__Bryobacteraceae g__Bryobacter 2 +196447 sk__Bacteria k__ p__Acidobacteria c__Thermoanaerobaculia o__Thermoanaerobaculales f__Thermoanaerobaculaceae NA 1 +23596 sk__Bacteria k__ p__Actinobacteria NA NA NA NA 1 +126258 sk__Bacteria k__ p__Actinobacteria c__Acidimicrobiia NA NA NA 2 +105940 sk__Bacteria k__ p__Actinobacteria c__Actinobacteria NA NA NA 1 +8251 sk__Bacteria k__ p__Actinobacteria c__Actinobacteria o__Micromonosporales f__Micromonosporaceae NA 2 +58206 sk__Bacteria k__ p__Actinobacteria c__Rubrobacteria o__Gaiellales NA NA 1 +127789 sk__Bacteria k__ p__Bacteroidetes c__Chitinophagia o__Chitinophagales NA NA 1 +173872 sk__Bacteria k__ p__Bacteroidetes c__Chitinophagia o__Chitinophagales f__Chitinophagaceae g__Flavisolibacter 1 +176861 sk__Bacteria k__ p__Bacteroidetes c__Cytophagia o__Cytophagales NA NA 1 +166810 sk__Bacteria k__ p__Bacteroidetes c__Cytophagia o__Cytophagales f__Microscillaceae NA 1 +93849 sk__Bacteria k__ p__Cyanobacteria NA NA NA NA 1 +111794 sk__Bacteria k__ p__Proteobacteria c__Alphaproteobacteria o__Rhizobiales f__Bradyrhizobiaceae NA 1 +83155 sk__Bacteria k__ p__Verrucomicrobia c__Verrucomicrobiae NA NA NA 2 +125725 sk__Eukaryota k__ p__ c__Bigyra o__Amphifilida f__ g__Sorodiplophrys 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tax_glom_output1.tabular Fri Apr 04 10:16:43 2025 +0000 @@ -0,0 +1,4 @@ +OTU ID Superkingdom Kingdom Phylum Class Order Family Genus sa1 +107861 sk__Bacteria k__ p__Acidobacteria c__Acidobacteriia o__Bryobacterales f__Bryobacteraceae g__Bryobacter 2 +173872 sk__Bacteria k__ p__Bacteroidetes c__Chitinophagia o__Chitinophagales f__Chitinophagaceae g__Flavisolibacter 1 +125725 sk__Eukaryota k__ p__ c__Bigyra o__Amphifilida f__ g__Sorodiplophrys 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tax_glom_output2.tabular Fri Apr 04 10:16:43 2025 +0000 @@ -0,0 +1,4 @@ +Superkingdom Kingdom Phylum Class Order Family Genus sa1 +sk__Bacteria k__ p__Acidobacteria c__Acidobacteriia o__Bryobacterales f__Bryobacteraceae g__Bryobacter 2 +sk__Bacteria k__ p__Bacteroidetes c__Chitinophagia o__Chitinophagales f__Chitinophagaceae g__Flavisolibacter 1 +sk__Eukaryota k__ p__ c__Bigyra o__Amphifilida f__ g__Sorodiplophrys 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tax_glom_output3.tabular Fri Apr 04 10:16:43 2025 +0000 @@ -0,0 +1,4 @@ +Genus sa1 +g__Bryobacter 2 +g__Flavisolibacter 1 +g__Sorodiplophrys 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tax_glom_output4.tabular Fri Apr 04 10:16:43 2025 +0000 @@ -0,0 +1,25 @@ +OTU ID Kingdom Phylum Class Order Family Genus SRR14190457 SRR14190458 +ASV1 Bacteria Firmicutes Bacilli Lactobacillales Lactobacillaceae NA 914 534 +ASV2 Bacteria Bacteroidota Bacteroidia Bacteroidales Bacteroidaceae NA 488 215 +ASV3 Bacteria Actinobacteriota Actinobacteria Bifidobacteriales Bifidobacteriaceae NA 199 157 +ASV5 Bacteria Firmicutes Clostridia Lachnospirales Lachnospiraceae NA 531 419 +ASV9 Bacteria Firmicutes Bacilli Bacillales Bacillaceae NA 88 26 +ASV20 Bacteria Firmicutes Bacilli Erysipelotrichales Erysipelotrichaceae NA 101 99 +ASV28 Bacteria Firmicutes Clostridia Oscillospirales Butyricicoccaceae NA 55 47 +ASV29 Bacteria Proteobacteria Gammaproteobacteria Burkholderiales Sutterellaceae NA 54 47 +ASV31 Bacteria Firmicutes Negativicutes Veillonellales-Selenomonadales Veillonellaceae NA 119 83 +ASV34 Bacteria Bacteroidota Bacteroidia Bacteroidales Porphyromonadaceae NA 51 53 +ASV36 Bacteria Firmicutes Negativicutes Acidaminococcales Acidaminococcaceae NA 47 54 +ASV41 Bacteria Firmicutes Bacilli Mycoplasmatales Mycoplasmataceae NA 42 58 +ASV43 Bacteria Firmicutes Bacilli Lactobacillales Streptococcaceae NA 40 60 +ASV45 Bacteria Firmicutes Bacilli Staphylococcales Staphylococcaceae NA 38 62 +ASV47 Bacteria Bacteroidota Bacteroidia Bacteroidales Prevotellaceae NA 32 66 +ASV51 Bacteria Firmicutes Clostridia Oscillospirales Oscillospiraceae NA 28 71 +ASV52 Bacteria Firmicutes Clostridia Oscillospirales Ruminococcaceae NA 28 71 +ASV55 Bacteria Actinobacteriota Actinobacteria Corynebacteriales Corynebacteriaceae NA 26 86 +ASV59 Bacteria Proteobacteria Gammaproteobacteria Pseudomonadales Pseudomonadaceae NA 22 104 +ASV60 Bacteria Proteobacteria Gammaproteobacteria Enterobacterales Enterobacteriaceae NA 503 607 +ASV61 Bacteria Firmicutes Clostridia Peptostreptococcales-Tissierellales Peptostreptococcaceae NA 22 128 +ASV62 Bacteria Actinobacteriota Actinobacteria Propionibacteriales Propionibacteriaceae NA 16 129 +ASV63 Bacteria Bacteroidota Bacteroidia Bacteroidales Muribaculaceae NA 15 136 +ASV64 Bacteria Bacteroidota Bacteroidia Chitinophagales Chitinophagaceae NA 11 178
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tax_glom_output5.tabular Fri Apr 04 10:16:43 2025 +0000 @@ -0,0 +1,25 @@ +Family SRR14190457 SRR14190458 +Acidaminococcaceae 47 54 +Bacillaceae 88 26 +Bacteroidaceae 488 215 +Bifidobacteriaceae 199 157 +Butyricicoccaceae 55 47 +Chitinophagaceae 11 178 +Corynebacteriaceae 26 86 +Enterobacteriaceae 503 607 +Erysipelotrichaceae 101 99 +Lachnospiraceae 531 419 +Lactobacillaceae 914 534 +Muribaculaceae 15 136 +Mycoplasmataceae 42 58 +Oscillospiraceae 28 71 +Peptostreptococcaceae 22 128 +Porphyromonadaceae 51 53 +Prevotellaceae 32 66 +Propionibacteriaceae 16 129 +Pseudomonadaceae 22 104 +Ruminococcaceae 28 71 +Staphylococcaceae 38 62 +Streptococcaceae 40 60 +Sutterellaceae 54 47 +Veillonellaceae 119 83