changeset 10:cee4982a717b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
author iuc
date Fri, 04 Apr 2025 10:16:43 +0000
parents 32f892954ef6
children
files phyloseq_tax_glom.R test-data/SRR1770594.phyloseq test-data/tax_glom_output0.tabular test-data/tax_glom_output1.tabular test-data/tax_glom_output2.tabular test-data/tax_glom_output3.tabular test-data/tax_glom_output4.tabular test-data/tax_glom_output5.tabular
diffstat 8 files changed, 153 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/phyloseq_tax_glom.R	Fri Apr 04 10:16:43 2025 +0000
@@ -0,0 +1,73 @@
+suppressPackageStartupMessages(library("phyloseq"))
+suppressPackageStartupMessages(library("dplyr"))
+suppressPackageStartupMessages(library("optparse"))
+
+# Define command-line options
+option_list <- list(
+    make_option(c("-i", "--input"), type = "character", help = "Path to the phyloseq RDS file", metavar = "FILE"),
+    make_option(c("-r", "--rank"), type = "character", help = "Taxonomic rank for aggregation"),
+    make_option("--exclude_otu_ids", action = "store_true", default = FALSE, help = "Exclude OTU IDs from output"),
+    make_option("--single_rank", action = "store_true", default = FALSE, help = "Only output the specified rank column"),
+    make_option("--exclude_na_values", action = "store_true", default = FALSE, help = "Exclude NA values during tax_glom")
+)
+
+# Parse arguments
+opt <- parse_args(OptionParser(option_list = option_list))
+
+# Validate arguments
+if (is.null(opt$input) || is.null(opt$rank)) {
+    stop("Error: --input and --rank are required arguments.")
+}
+
+if (opt$single_rank && !opt$exclude_otu_ids) {
+    stop("Error: --single_rank can only be used if --exclude_otu_ids is also specified.")
+}
+
+# Load the phyloseq object
+physeq <- readRDS(opt$input)
+
+# Print available taxonomic ranks
+cat("Available taxonomic ranks:\n")
+print(rank_names(physeq))
+
+# Print original number of OTUs
+cat("Original number of OTUs:", ntaxa(physeq), "\n")
+
+# Perform tax_glom
+physeq_agg <- tax_glom(physeq, taxrank = opt$rank, NArm = opt$exclude_na_values)
+
+# Print new number of taxa after agglomeration
+cat("Number of taxa after agglomeration at", opt$rank, "level:", ntaxa(physeq_agg), "\n")
+
+# Extract the taxonomy table after agglomeration
+tax_table_agg <- as.data.frame(tax_table(physeq_agg))
+
+# Convert taxonomic columns to character to preserve NA values
+tax_table_agg[] <- lapply(tax_table_agg, as.character)
+
+# Add OTU ID column unless excluded
+if (!opt$exclude_otu_ids) {
+    tax_table_agg <- cbind("OTU ID" = rownames(tax_table_agg), tax_table_agg)
+}
+
+# Extract OTU abundance table and convert to data frame
+otu_table_agg <- as.data.frame(otu_table(physeq_agg))
+
+# Append taxonomic information to output
+otu_table_agg <- cbind(tax_table_agg, otu_table_agg)
+
+tax_table_agg <- otu_table_agg
+
+if (opt$single_rank) {
+    # Keep only the specified taxonomic rank column and numeric count columns
+    tax_table_agg <- tax_table_agg %>% select(all_of(opt$rank), where(is.numeric))
+
+    # Group by taxonomic rank and sum the counts
+    tax_table_agg <- tax_table_agg %>%
+        group_by(across(all_of(opt$rank))) %>%
+        summarise(across(where(is.numeric), sum), .groups = "drop")
+}
+
+# Save the output as a TSV file
+output_file <- paste0("physeq_", opt$rank, "_table.tsv")
+write.table(tax_table_agg, file = output_file, sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE)
Binary file test-data/SRR1770594.phyloseq has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output0.tabular	Fri Apr 04 10:16:43 2025 +0000
@@ -0,0 +1,18 @@
+OTU ID	Superkingdom	Kingdom	Phylum	Class	Order	Family	Genus	sa1
+55939	sk__Archaea	k__	p__Thaumarchaeota	c__Nitrososphaeria	NA	NA	NA	1
+220170	sk__Archaea	k__	p__Thaumarchaeota	c__Nitrososphaeria	o__Nitrososphaerales	f__Nitrososphaeraceae	NA	1
+107861	sk__Bacteria	k__	p__Acidobacteria	c__Acidobacteriia	o__Bryobacterales	f__Bryobacteraceae	g__Bryobacter	2
+196447	sk__Bacteria	k__	p__Acidobacteria	c__Thermoanaerobaculia	o__Thermoanaerobaculales	f__Thermoanaerobaculaceae	NA	1
+23596	sk__Bacteria	k__	p__Actinobacteria	NA	NA	NA	NA	1
+126258	sk__Bacteria	k__	p__Actinobacteria	c__Acidimicrobiia	NA	NA	NA	2
+105940	sk__Bacteria	k__	p__Actinobacteria	c__Actinobacteria	NA	NA	NA	1
+8251	sk__Bacteria	k__	p__Actinobacteria	c__Actinobacteria	o__Micromonosporales	f__Micromonosporaceae	NA	2
+58206	sk__Bacteria	k__	p__Actinobacteria	c__Rubrobacteria	o__Gaiellales	NA	NA	1
+127789	sk__Bacteria	k__	p__Bacteroidetes	c__Chitinophagia	o__Chitinophagales	NA	NA	1
+173872	sk__Bacteria	k__	p__Bacteroidetes	c__Chitinophagia	o__Chitinophagales	f__Chitinophagaceae	g__Flavisolibacter	1
+176861	sk__Bacteria	k__	p__Bacteroidetes	c__Cytophagia	o__Cytophagales	NA	NA	1
+166810	sk__Bacteria	k__	p__Bacteroidetes	c__Cytophagia	o__Cytophagales	f__Microscillaceae	NA	1
+93849	sk__Bacteria	k__	p__Cyanobacteria	NA	NA	NA	NA	1
+111794	sk__Bacteria	k__	p__Proteobacteria	c__Alphaproteobacteria	o__Rhizobiales	f__Bradyrhizobiaceae	NA	1
+83155	sk__Bacteria	k__	p__Verrucomicrobia	c__Verrucomicrobiae	NA	NA	NA	2
+125725	sk__Eukaryota	k__	p__	c__Bigyra	o__Amphifilida	f__	g__Sorodiplophrys	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output1.tabular	Fri Apr 04 10:16:43 2025 +0000
@@ -0,0 +1,4 @@
+OTU ID	Superkingdom	Kingdom	Phylum	Class	Order	Family	Genus	sa1
+107861	sk__Bacteria	k__	p__Acidobacteria	c__Acidobacteriia	o__Bryobacterales	f__Bryobacteraceae	g__Bryobacter	2
+173872	sk__Bacteria	k__	p__Bacteroidetes	c__Chitinophagia	o__Chitinophagales	f__Chitinophagaceae	g__Flavisolibacter	1
+125725	sk__Eukaryota	k__	p__	c__Bigyra	o__Amphifilida	f__	g__Sorodiplophrys	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output2.tabular	Fri Apr 04 10:16:43 2025 +0000
@@ -0,0 +1,4 @@
+Superkingdom	Kingdom	Phylum	Class	Order	Family	Genus	sa1
+sk__Bacteria	k__	p__Acidobacteria	c__Acidobacteriia	o__Bryobacterales	f__Bryobacteraceae	g__Bryobacter	2
+sk__Bacteria	k__	p__Bacteroidetes	c__Chitinophagia	o__Chitinophagales	f__Chitinophagaceae	g__Flavisolibacter	1
+sk__Eukaryota	k__	p__	c__Bigyra	o__Amphifilida	f__	g__Sorodiplophrys	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output3.tabular	Fri Apr 04 10:16:43 2025 +0000
@@ -0,0 +1,4 @@
+Genus	sa1
+g__Bryobacter	2
+g__Flavisolibacter	1
+g__Sorodiplophrys	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output4.tabular	Fri Apr 04 10:16:43 2025 +0000
@@ -0,0 +1,25 @@
+OTU ID	Kingdom	Phylum	Class	Order	Family	Genus	SRR14190457	SRR14190458
+ASV1	Bacteria	Firmicutes	Bacilli	Lactobacillales	Lactobacillaceae	NA	914	534
+ASV2	Bacteria	Bacteroidota	Bacteroidia	Bacteroidales	Bacteroidaceae	NA	488	215
+ASV3	Bacteria	Actinobacteriota	Actinobacteria	Bifidobacteriales	Bifidobacteriaceae	NA	199	157
+ASV5	Bacteria	Firmicutes	Clostridia	Lachnospirales	Lachnospiraceae	NA	531	419
+ASV9	Bacteria	Firmicutes	Bacilli	Bacillales	Bacillaceae	NA	88	26
+ASV20	Bacteria	Firmicutes	Bacilli	Erysipelotrichales	Erysipelotrichaceae	NA	101	99
+ASV28	Bacteria	Firmicutes	Clostridia	Oscillospirales	Butyricicoccaceae	NA	55	47
+ASV29	Bacteria	Proteobacteria	Gammaproteobacteria	Burkholderiales	Sutterellaceae	NA	54	47
+ASV31	Bacteria	Firmicutes	Negativicutes	Veillonellales-Selenomonadales	Veillonellaceae	NA	119	83
+ASV34	Bacteria	Bacteroidota	Bacteroidia	Bacteroidales	Porphyromonadaceae	NA	51	53
+ASV36	Bacteria	Firmicutes	Negativicutes	Acidaminococcales	Acidaminococcaceae	NA	47	54
+ASV41	Bacteria	Firmicutes	Bacilli	Mycoplasmatales	Mycoplasmataceae	NA	42	58
+ASV43	Bacteria	Firmicutes	Bacilli	Lactobacillales	Streptococcaceae	NA	40	60
+ASV45	Bacteria	Firmicutes	Bacilli	Staphylococcales	Staphylococcaceae	NA	38	62
+ASV47	Bacteria	Bacteroidota	Bacteroidia	Bacteroidales	Prevotellaceae	NA	32	66
+ASV51	Bacteria	Firmicutes	Clostridia	Oscillospirales	Oscillospiraceae	NA	28	71
+ASV52	Bacteria	Firmicutes	Clostridia	Oscillospirales	Ruminococcaceae	NA	28	71
+ASV55	Bacteria	Actinobacteriota	Actinobacteria	Corynebacteriales	Corynebacteriaceae	NA	26	86
+ASV59	Bacteria	Proteobacteria	Gammaproteobacteria	Pseudomonadales	Pseudomonadaceae	NA	22	104
+ASV60	Bacteria	Proteobacteria	Gammaproteobacteria	Enterobacterales	Enterobacteriaceae	NA	503	607
+ASV61	Bacteria	Firmicutes	Clostridia	Peptostreptococcales-Tissierellales	Peptostreptococcaceae	NA	22	128
+ASV62	Bacteria	Actinobacteriota	Actinobacteria	Propionibacteriales	Propionibacteriaceae	NA	16	129
+ASV63	Bacteria	Bacteroidota	Bacteroidia	Bacteroidales	Muribaculaceae	NA	15	136
+ASV64	Bacteria	Bacteroidota	Bacteroidia	Chitinophagales	Chitinophagaceae	NA	11	178
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/tax_glom_output5.tabular	Fri Apr 04 10:16:43 2025 +0000
@@ -0,0 +1,25 @@
+Family	SRR14190457	SRR14190458
+Acidaminococcaceae	47	54
+Bacillaceae	88	26
+Bacteroidaceae	488	215
+Bifidobacteriaceae	199	157
+Butyricicoccaceae	55	47
+Chitinophagaceae	11	178
+Corynebacteriaceae	26	86
+Enterobacteriaceae	503	607
+Erysipelotrichaceae	101	99
+Lachnospiraceae	531	419
+Lactobacillaceae	914	534
+Muribaculaceae	15	136
+Mycoplasmataceae	42	58
+Oscillospiraceae	28	71
+Peptostreptococcaceae	22	128
+Porphyromonadaceae	51	53
+Prevotellaceae	32	66
+Propionibacteriaceae	16	129
+Pseudomonadaceae	22	104
+Ruminococcaceae	28	71
+Staphylococcaceae	38	62
+Streptococcaceae	40	60
+Sutterellaceae	54	47
+Veillonellaceae	119	83