Mercurial > repos > iuc > phyloseq_plot_ordination
annotate phyloseq_tax_glom.R @ 10:cee4982a717b draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
author | iuc |
---|---|
date | Fri, 04 Apr 2025 10:16:43 +0000 |
parents | |
children |
rev | line source |
---|---|
10
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
1 suppressPackageStartupMessages(library("phyloseq")) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
2 suppressPackageStartupMessages(library("dplyr")) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
3 suppressPackageStartupMessages(library("optparse")) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
4 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
5 # Define command-line options |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
6 option_list <- list( |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
7 make_option(c("-i", "--input"), type = "character", help = "Path to the phyloseq RDS file", metavar = "FILE"), |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
8 make_option(c("-r", "--rank"), type = "character", help = "Taxonomic rank for aggregation"), |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
9 make_option("--exclude_otu_ids", action = "store_true", default = FALSE, help = "Exclude OTU IDs from output"), |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
10 make_option("--single_rank", action = "store_true", default = FALSE, help = "Only output the specified rank column"), |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
11 make_option("--exclude_na_values", action = "store_true", default = FALSE, help = "Exclude NA values during tax_glom") |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
12 ) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
13 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
14 # Parse arguments |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
15 opt <- parse_args(OptionParser(option_list = option_list)) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
16 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
17 # Validate arguments |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
18 if (is.null(opt$input) || is.null(opt$rank)) { |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
19 stop("Error: --input and --rank are required arguments.") |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
20 } |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
21 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
22 if (opt$single_rank && !opt$exclude_otu_ids) { |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
23 stop("Error: --single_rank can only be used if --exclude_otu_ids is also specified.") |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
24 } |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
25 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
26 # Load the phyloseq object |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
27 physeq <- readRDS(opt$input) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
28 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
29 # Print available taxonomic ranks |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
30 cat("Available taxonomic ranks:\n") |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
31 print(rank_names(physeq)) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
32 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
33 # Print original number of OTUs |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
34 cat("Original number of OTUs:", ntaxa(physeq), "\n") |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
35 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
36 # Perform tax_glom |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
37 physeq_agg <- tax_glom(physeq, taxrank = opt$rank, NArm = opt$exclude_na_values) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
38 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
39 # Print new number of taxa after agglomeration |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
40 cat("Number of taxa after agglomeration at", opt$rank, "level:", ntaxa(physeq_agg), "\n") |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
41 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
42 # Extract the taxonomy table after agglomeration |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
43 tax_table_agg <- as.data.frame(tax_table(physeq_agg)) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
44 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
45 # Convert taxonomic columns to character to preserve NA values |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
46 tax_table_agg[] <- lapply(tax_table_agg, as.character) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
47 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
48 # Add OTU ID column unless excluded |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
49 if (!opt$exclude_otu_ids) { |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
50 tax_table_agg <- cbind("OTU ID" = rownames(tax_table_agg), tax_table_agg) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
51 } |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
52 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
53 # Extract OTU abundance table and convert to data frame |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
54 otu_table_agg <- as.data.frame(otu_table(physeq_agg)) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
55 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
56 # Append taxonomic information to output |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
57 otu_table_agg <- cbind(tax_table_agg, otu_table_agg) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
58 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
59 tax_table_agg <- otu_table_agg |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
60 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
61 if (opt$single_rank) { |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
62 # Keep only the specified taxonomic rank column and numeric count columns |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
63 tax_table_agg <- tax_table_agg %>% select(all_of(opt$rank), where(is.numeric)) |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
64 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
65 # Group by taxonomic rank and sum the counts |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
66 tax_table_agg <- tax_table_agg %>% |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
67 group_by(across(all_of(opt$rank))) %>% |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
68 summarise(across(where(is.numeric), sum), .groups = "drop") |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
69 } |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
70 |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
71 # Save the output as a TSV file |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
72 output_file <- paste0("physeq_", opt$rank, "_table.tsv") |
cee4982a717b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit fa72d860839082a926004d8a97a03a3e27701333
iuc
parents:
diff
changeset
|
73 write.table(tax_table_agg, file = output_file, sep = "\t", quote = FALSE, row.names = FALSE, col.names = TRUE) |