Mercurial > repos > iuc > dada2_makesequencetable
comparison test-data/gentest.R @ 6:8470de786d47 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit ea6c9c638e742c097b0ef294161eeea447c09e06
author | iuc |
---|---|
date | Fri, 30 Jun 2023 07:59:55 +0000 |
parents | bfd62e881837 |
children | 43c99742158b |
comparison
equal
deleted
inserted
replaced
5:e96bbb9e24ea | 6:8470de786d47 |
---|---|
1 library(dada2, quietly = T) | 1 library(dada2, quietly = TRUE) |
2 library(ggplot2, quietly = T) | 2 library(ggplot2, quietly = TRUE) |
3 | 3 |
4 sample_names <- c("F3D0_S188_L001", "F3D141_S207_L001") | 4 sample_names <- c("F3D0_S188_L001", "F3D141_S207_L001") |
5 fwd <- c("F3D0_S188_L001_R1_001.fastq.gz", "F3D141_S207_L001_R1_001.fastq.gz") | 5 fwd <- c("F3D0_S188_L001_R1_001.fastq.gz", "F3D141_S207_L001_R1_001.fastq.gz") |
6 rev <- c("F3D0_S188_L001_R2_001.fastq.gz", "F3D141_S207_L001_R2_001.fastq.gz") | 6 rev <- c("F3D0_S188_L001_R2_001.fastq.gz", "F3D141_S207_L001_R2_001.fastq.gz") |
7 | 7 |
10 | 10 |
11 print("filterAndTrim") | 11 print("filterAndTrim") |
12 | 12 |
13 for (i in seq_len(fwd)) { | 13 for (i in seq_len(fwd)) { |
14 ftout <- dada2::filterAndTrim(fwd[i], filt_fwd[i], rev[i], filt_rev[i]) | 14 ftout <- dada2::filterAndTrim(fwd[i], filt_fwd[i], rev[i], filt_rev[i]) |
15 b <- paste(strsplit(fwd[i], ".", fixed = T)[[1]][1], "tab", sep = ".") | 15 b <- paste(strsplit(fwd[i], ".", fixed = TRUE)[[1]][1], "tab", sep = ".") |
16 write.table(ftout, b, quote = F, sep = "\t", col.names = NA) | 16 write.table(ftout, b, quote = FALSE, sep = "\t", col.names = NA) |
17 } | 17 } |
18 | 18 |
19 # In the test only the 1st data set is used | 19 # In the test only the 1st data set is used |
20 t <- data.frame() | 20 t <- data.frame() |
21 t <- rbind(t, ftout[1, ]) | 21 t <- rbind(t, ftout[1, ]) |
22 colnames(t) <- colnames(ftout) | 22 colnames(t) <- colnames(ftout) |
23 rownames(t) <- rownames(ftout)[1] | 23 rownames(t) <- rownames(ftout)[1] |
24 write.table(t, "filterAndTrim.tab", quote = F, sep = "\t", col.names = NA) | 24 write.table(t, "filterAndTrim.tab", quote = FALSE, sep = "\t", col.names = NA) |
25 | 25 |
26 names(fwd) <- sample_names | 26 names(fwd) <- sample_names |
27 names(rev) <- sample_names | 27 names(rev) <- sample_names |
28 names(filt_fwd) <- sample_names | 28 names(filt_fwd) <- sample_names |
29 names(filt_rev) <- sample_names | 29 names(filt_rev) <- sample_names |
77 | 77 |
78 | 78 |
79 # make sequence table | 79 # make sequence table |
80 print("makeSequenceTable") | 80 print("makeSequenceTable") |
81 seqtab <- makeSequenceTable(merged) | 81 seqtab <- makeSequenceTable(merged) |
82 write.table(t(seqtab), file = "makeSequenceTable.tab", quote = F, sep = "\t", row.names = T, col.names = NA) | 82 write.table(t(seqtab), file = "makeSequenceTable.tab", quote = FALSE, sep = "\t", row.names = TRUE, col.names = NA) |
83 | 83 |
84 reads_per_seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum) | 84 reads_per_seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum) |
85 df <- data.frame(length = as.numeric(names(reads_per_seqlen)), count = reads_per_seqlen) | 85 df <- data.frame(length = as.numeric(names(reads_per_seqlen)), count = reads_per_seqlen) |
86 pdf("makeSequenceTable.pdf") | 86 pdf("makeSequenceTable.pdf") |
87 ggplot(data = df, aes(x = length, y = count)) + | 87 ggplot(data = df, aes(x = length, y = count)) + |
90 bequiet <- dev.off() | 90 bequiet <- dev.off() |
91 | 91 |
92 # remove bimera | 92 # remove bimera |
93 print("removeBimera") | 93 print("removeBimera") |
94 seqtab_nochim <- dada2::removeBimeraDenovo(seqtab) | 94 seqtab_nochim <- dada2::removeBimeraDenovo(seqtab) |
95 write.table(t(seqtab), file = "removeBimeraDenovo.tab", quote = F, sep = "\t", row.names = T, col.names = NA) | 95 write.table(t(seqtab), file = "removeBimeraDenovo.tab", quote = FALSE, sep = "\t", row.names = TRUE, col.names = NA) |
96 | 96 |
97 # assign taxonomy/species | 97 # assign taxonomy/species |
98 tl <- "Level1,Level2,Level3,Level4,Level5" | 98 tl <- "Level1,Level2,Level3,Level4,Level5" |
99 tl <- strsplit(tl, ",")[[1]] | 99 tl <- strsplit(tl, ",")[[1]] |
100 | 100 |
101 set.seed(42) | 101 set.seed(42) |
102 print("assignTaxonomyAndSpecies") | 102 print("assignTaxonomyAndSpecies") |
103 taxa <- dada2::assignTaxonomy(seqtab_nochim, "reference.fa.gz", outputBootstraps = T, taxLevels = tl, multithread = 1) | 103 taxa <- dada2::assignTaxonomy(seqtab_nochim, "reference.fa.gz", outputBootstraps = TRUE, taxLevels = tl, multithread = 1) |
104 | 104 |
105 taxa$tax <- dada2::addSpecies(taxa$tax, "reference_species.fa.gz") | 105 taxa$tax <- dada2::addSpecies(taxa$tax, "reference_species.fa.gz") |
106 write.table(taxa$tax, file = "assignTaxonomyAddspecies.tab", quote = F, sep = "\t", row.names = T, col.names = NA) | 106 write.table(taxa$tax, file = "assignTaxonomyAddspecies.tab", quote = FALSE, sep = "\t", row.names = TRUE, col.names = NA) |
107 | 107 |
108 write.table(taxa$boot, file = "assignTaxonomyAddspecies_boot.tab", quote = F, sep = "\t", row.names = T, col.names = NA) | 108 write.table(taxa$boot, file = "assignTaxonomyAddspecies_boot.tab", quote = FALSE, sep = "\t", row.names = TRUE, col.names = NA) |
109 | 109 |
110 | 110 |
111 ## Generate extra test data for parameter testing | 111 ## Generate extra test data for parameter testing |
112 print("alternatives") | 112 print("alternatives") |
113 dada2::filterAndTrim(fwd, c("filterAndTrim_single_F3D0_R1.fq.gz", "filterAndTrim_single_F3D141_R1.fq.gz"), rm.phix = T, orient.fwd = "TACGG") | 113 dada2::filterAndTrim(fwd, c("filterAndTrim_single_F3D0_R1.fq.gz", "filterAndTrim_single_F3D141_R1.fq.gz"), rm.phix = TRUE, orient.fwd = "TACGG") |
114 | 114 |
115 dada2::filterAndTrim(fwd, c("filterAndTrim_single_trimmers_F3D0_R1.fq.gz", "filterAndTrim_single_trimmers_F3D141_R1.fq.gz"), truncQ = 30, truncLen = 2, trimLeft = 150, trimRight = 2) | 115 dada2::filterAndTrim(fwd, c("filterAndTrim_single_trimmers_F3D0_R1.fq.gz", "filterAndTrim_single_trimmers_F3D141_R1.fq.gz"), truncQ = 30, truncLen = 2, trimLeft = 150, trimRight = 2) |
116 | 116 |
117 dada2::filterAndTrim(fwd, c("filterAndTrim_single_filters_F3D0_R1.fq.gz", "filterAndTrim_single_filters_F3D141_R1.fq.gz"), maxLen = 255, minLen = 60, maxN = 100, minQ = 13, maxEE = 1) | 117 dada2::filterAndTrim(fwd, c("filterAndTrim_single_filters_F3D0_R1.fq.gz", "filterAndTrim_single_filters_F3D141_R1.fq.gz"), maxLen = 255, minLen = 60, maxN = 100, minQ = 13, maxEE = 1) |
118 | 118 |
120 merged_nondef <- dada2::mergePairs(dada_fwd, filt_fwd, dada_rev, filt_rev, minOverlap = 8, maxMismatch = 1, justConcatenate = TRUE, trimOverhang = TRUE) | 120 merged_nondef <- dada2::mergePairs(dada_fwd, filt_fwd, dada_rev, filt_rev, minOverlap = 8, maxMismatch = 1, justConcatenate = TRUE, trimOverhang = TRUE) |
121 for (id in sample_names) { | 121 for (id in sample_names) { |
122 saveRDS(merged_nondef[[id]], file = paste("mergePairs_", id, "_nondefault.Rdata", sep = "")) | 122 saveRDS(merged_nondef[[id]], file = paste("mergePairs_", id, "_nondefault.Rdata", sep = "")) |
123 } | 123 } |
124 rb_dada_fwd <- dada2::removeBimeraDenovo(dada_fwd[["F3D0_S188_L001"]]) | 124 rb_dada_fwd <- dada2::removeBimeraDenovo(dada_fwd[["F3D0_S188_L001"]]) |
125 write.table(rb_dada_fwd, file = "removeBimeraDenovo_F3D0_dada_uniques.tab", quote = F, sep = "\t", row.names = T, col.names = F) | 125 write.table(rb_dada_fwd, file = "removeBimeraDenovo_F3D0_dada_uniques.tab", quote = FALSE, sep = "\t", row.names = TRUE, col.names = FALSE) |
126 | 126 |
127 rb_merged <- dada2::removeBimeraDenovo(merged, method = "pooled") | 127 rb_merged <- dada2::removeBimeraDenovo(merged, method = "pooled") |
128 saveRDS(rb_merged, file = "removeBimeraDenovo_F3D0_mergepairs.Rdata") | 128 saveRDS(rb_merged, file = "removeBimeraDenovo_F3D0_mergepairs.Rdata") |
129 | 129 |
130 # SeqCounts | 130 # SeqCounts |
132 sum(dada2::getUniques(x)) | 132 sum(dada2::getUniques(x)) |
133 } | 133 } |
134 | 134 |
135 print("seqCounts ft") | 135 print("seqCounts ft") |
136 samples <- list() | 136 samples <- list() |
137 samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header = T, sep = "\t", row.names = 1) | 137 samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header = TRUE, sep = "\t", row.names = 1) |
138 dname <- "filter" | 138 dname <- "filter" |
139 tdf <- samples[["F3D0_S188_L001_R1_001.tab"]] | 139 tdf <- samples[["F3D0_S188_L001_R1_001.tab"]] |
140 names(tdf) <- paste(dname, names(tdf)) | 140 names(tdf) <- paste(dname, names(tdf)) |
141 tdf <- cbind(data.frame(samples = names(samples)), tdf) | 141 tdf <- cbind(data.frame(samples = names(samples)), tdf) |
142 write.table(tdf, "seqCounts_filter.tab", quote = F, sep = "\t", row.names = F, col.names = T) | 142 write.table(tdf, "seqCounts_filter.tab", quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE) |
143 | 143 |
144 samples <- list() | 144 samples <- list() |
145 samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header = T, sep = "\t", row.names = 1) | 145 samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header = TRUE, sep = "\t", row.names = 1) |
146 samples[["F3D141_S207_L001_R1_001.tab"]] <- read.table("F3D141_S207_L001_R1_001.tab", header = T, sep = "\t", row.names = 1) | 146 samples[["F3D141_S207_L001_R1_001.tab"]] <- read.table("F3D141_S207_L001_R1_001.tab", header = TRUE, sep = "\t", row.names = 1) |
147 dname <- "filter" | 147 dname <- "filter" |
148 tdf <- samples[["F3D0_S188_L001_R1_001.tab"]] | 148 tdf <- samples[["F3D0_S188_L001_R1_001.tab"]] |
149 tdf <- rbind(tdf, samples[["F3D141_S207_L001_R1_001.tab"]]) | 149 tdf <- rbind(tdf, samples[["F3D141_S207_L001_R1_001.tab"]]) |
150 names(tdf) <- paste(dname, names(tdf)) | 150 names(tdf) <- paste(dname, names(tdf)) |
151 tdf <- cbind(data.frame(samples = names(samples)), tdf) | 151 tdf <- cbind(data.frame(samples = names(samples)), tdf) |
152 write.table(tdf, "seqCounts_filter_both.tab", quote = F, sep = "\t", row.names = F, col.names = T) | 152 write.table(tdf, "seqCounts_filter_both.tab", quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE) |
153 | 153 |
154 print("seqCounts dada") | 154 print("seqCounts dada") |
155 samples <- list() | 155 samples <- list() |
156 samples[["dada_F3D0_S188_L001_R1.Rdata"]] <- readRDS("dada_F3D0_S188_L001_R1.Rdata") | 156 samples[["dada_F3D0_S188_L001_R1.Rdata"]] <- readRDS("dada_F3D0_S188_L001_R1.Rdata") |
157 samples[["dada_F3D141_S207_L001_R1.Rdata"]] <- readRDS("dada_F3D141_S207_L001_R1.Rdata") | 157 samples[["dada_F3D141_S207_L001_R1.Rdata"]] <- readRDS("dada_F3D141_S207_L001_R1.Rdata") |
158 dname <- "dadaF" | 158 dname <- "dadaF" |
159 tdf <- data.frame(samples = names(samples)) | 159 tdf <- data.frame(samples = names(samples)) |
160 tdf[[dname]] <- sapply(samples, get_n) | 160 tdf[[dname]] <- sapply(samples, get_n) |
161 write.table(tdf, "seqCounts_dadaF.tab", quote = F, sep = "\t", row.names = F, col.names = T) | 161 write.table(tdf, "seqCounts_dadaF.tab", quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE) |
162 | 162 |
163 print("seqCounts mp") | 163 print("seqCounts mp") |
164 samples <- list() | 164 samples <- list() |
165 samples[["mergePairs_F3D0_S188_L001.Rdata"]] <- readRDS("mergePairs_F3D0_S188_L001.Rdata") | 165 samples[["mergePairs_F3D0_S188_L001.Rdata"]] <- readRDS("mergePairs_F3D0_S188_L001.Rdata") |
166 samples[["mergePairs_F3D141_S207_L001.Rdata"]] <- readRDS("mergePairs_F3D141_S207_L001.Rdata") | 166 samples[["mergePairs_F3D141_S207_L001.Rdata"]] <- readRDS("mergePairs_F3D141_S207_L001.Rdata") |
167 dname <- "merge" | 167 dname <- "merge" |
168 tdf <- data.frame(samples = names(samples)) | 168 tdf <- data.frame(samples = names(samples)) |
169 tdf[[dname]] <- sapply(samples, get_n) | 169 tdf[[dname]] <- sapply(samples, get_n) |
170 write.table(tdf, "seqCounts_merge.tab", quote = F, sep = "\t", row.names = F, col.names = T) | 170 write.table(tdf, "seqCounts_merge.tab", quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE) |
171 | 171 |
172 print("seqCounts st") | 172 print("seqCounts st") |
173 samples <- list() | 173 samples <- list() |
174 samples <- t(as.matrix(read.table("makeSequenceTable.tab", header = T, sep = "\t", row.names = 1))) | 174 samples <- t(as.matrix(read.table("makeSequenceTable.tab", header = TRUE, sep = "\t", row.names = 1))) |
175 dname <- "seqtab" | 175 dname <- "seqtab" |
176 tdf <- data.frame(samples = row.names(samples)) | 176 tdf <- data.frame(samples = row.names(samples)) |
177 tdf[[dname]] <- rowSums(samples) | 177 tdf[[dname]] <- rowSums(samples) |
178 write.table(tdf, "seqCounts_seqtab.tab", quote = F, sep = "\t", row.names = F, col.names = T) | 178 write.table(tdf, "seqCounts_seqtab.tab", quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE) |
179 | 179 |
180 print("seqCounts rb") | 180 print("seqCounts rb") |
181 samples <- list() | 181 samples <- list() |
182 samples <- t(as.matrix(read.table("removeBimeraDenovo.tab", header = T, sep = "\t", row.names = 1))) | 182 samples <- t(as.matrix(read.table("removeBimeraDenovo.tab", header = TRUE, sep = "\t", row.names = 1))) |
183 dname <- "nochim" | 183 dname <- "nochim" |
184 tdf <- data.frame(samples = row.names(samples)) | 184 tdf <- data.frame(samples = row.names(samples)) |
185 tdf[[dname]] <- rowSums(samples) | 185 tdf[[dname]] <- rowSums(samples) |
186 write.table(tdf, "seqCounts_nochim.tab", quote = F, sep = "\t", row.names = F, col.names = T) | 186 write.table(tdf, "seqCounts_nochim.tab", quote = FALSE, sep = "\t", row.names = FALSE, col.names = TRUE) |