annotate phyloseq_from_dada2.R @ 2:fb7c4bbe8994 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
author iuc
date Sat, 16 Mar 2024 07:55:57 +0000
parents 1ff178d1757e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
1 #!/usr/bin/env Rscript
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
2
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
3 suppressPackageStartupMessages(library("optparse"))
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
4 suppressPackageStartupMessages(library("phyloseq"))
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
5 suppressPackageStartupMessages(library("tidyverse"))
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
6
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
7 option_list <- list(
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
8 make_option(c("--sequence_table"), action = "store", dest = "sequence_table", help = "Input sequence table"),
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
9 make_option(c("--taxonomy_table"), action = "store", dest = "taxonomy_table", help = "Input taxonomy table"),
2
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
10 make_option(c("--sample_table"), action = "store", default = NULL, dest = "sample_table", help = "Input sample table"),
0
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
11 make_option(c("--output"), action = "store", dest = "output", help = "RDS output")
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
12 )
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
13
1
1ff178d1757e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 7df921baa7aa8680421b9440a1cd6eaab1a15ce2
iuc
parents: 0
diff changeset
14 parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
1ff178d1757e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 7df921baa7aa8680421b9440a1cd6eaab1a15ce2
iuc
parents: 0
diff changeset
15 args <- parse_args(parser, positional_arguments = TRUE)
1ff178d1757e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 7df921baa7aa8680421b9440a1cd6eaab1a15ce2
iuc
parents: 0
diff changeset
16 opt <- args$options
0
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
17 # The input sequence_table is an integer matrix
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
18 # stored as tabular (rows = samples, columns = ASVs).
2
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
19 seq_table_numeric_matrix <- data.matrix(read.table(opt$sequence_table, header = T, sep = "\t", row.names = 1, check.names = FALSE))
0
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
20 # The input taxonomy_table is a table containing
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
21 # the assigned taxonomies exceeding the minBoot
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
22 # level of bootstrapping confidence. Rows correspond
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
23 # to sequences, columns to taxonomic levels. NA
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
24 # indicates that the sequence was not consistently
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
25 # classified at that level at the minBoot threshold.
2
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
26 tax_table_matrix <- as.matrix(read.table(opt$taxonomy_table, header = T, sep = "\t", row.names = 1, check.names = FALSE))
0
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
27 # Construct a tax_table object. The rownames of
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
28 # tax_tab must match the OTU names (taxa_names)
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
29 # of the otu_table defined below.
1
1ff178d1757e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 7df921baa7aa8680421b9440a1cd6eaab1a15ce2
iuc
parents: 0
diff changeset
30 tax_tab <- tax_table(tax_table_matrix)
2
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
31
0
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
32 # Construct an otu_table object.
1
1ff178d1757e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 7df921baa7aa8680421b9440a1cd6eaab1a15ce2
iuc
parents: 0
diff changeset
33 otu_tab <- otu_table(seq_table_numeric_matrix, taxa_are_rows = TRUE)
2
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
34
0
92b82deaaed1 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit d1004c06207be773c278e12745aada276b63172e"
iuc
parents:
diff changeset
35 # Construct a phyloseq object.
1
1ff178d1757e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 7df921baa7aa8680421b9440a1cd6eaab1a15ce2
iuc
parents: 0
diff changeset
36 phyloseq_obj <- phyloseq(otu_tab, tax_tab)
2
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
37 if (!is.null(opt$sample_table)) {
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
38 sample_tab <- sample_data(
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
39 read.table(opt$sample_table, header = T, sep = "\t", row.names = 1, check.names = FALSE)
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
40 )
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
41 phyloseq_obj <- merge_phyloseq(phyloseq_obj, sample_tab)
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
42 }
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
43
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
44 # use short names for our ASVs and save the ASV sequences
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
45 # refseq slot of the phyloseq object as described in
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
46 # https://benjjneb.github.io/dada2/tutorial.html
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
47 dna <- Biostrings::DNAStringSet(taxa_names(phyloseq_obj))
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
48 names(dna) <- taxa_names(phyloseq_obj)
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
49 phyloseq_obj <- merge_phyloseq(phyloseq_obj, dna)
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
50 taxa_names(phyloseq_obj) <- paste0("ASV", seq(ntaxa(phyloseq_obj)))
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
51
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
52 print(phyloseq_obj)
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
53
fb7c4bbe8994 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 5ec9f9e81bb9a42dec5c331dd23215ca0b027b2b
iuc
parents: 1
diff changeset
54 # save R object to file
1
1ff178d1757e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/phyloseq commit 7df921baa7aa8680421b9440a1cd6eaab1a15ce2
iuc
parents: 0
diff changeset
55 saveRDS(phyloseq_obj, file = opt$output, compress = TRUE)