comparison test-data/gentest.R @ 8:d751c4726e0f draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit 6248c33a4eacc8785cac321cd4603acf06fae0fd
author iuc
date Thu, 23 May 2024 08:52:42 +0000
parents 75403243703a
children
comparison
equal deleted inserted replaced
7:75403243703a 8:d751c4726e0f
9 filt_rev <- c("filterAndTrim_F3D0_R2.fq.gz", "filterAndTrim_F3D141_R2.fq.gz") 9 filt_rev <- c("filterAndTrim_F3D0_R2.fq.gz", "filterAndTrim_F3D141_R2.fq.gz")
10 10
11 print("filterAndTrim") 11 print("filterAndTrim")
12 12
13 for (i in seq_len(fwd)) { 13 for (i in seq_len(fwd)) {
14 ftout <- dada2::filterAndTrim(fwd[i], filt_fwd[i], rev[i], filt_rev[i]) 14 ftout <- dada2::filterAndTrim(fwd[i], filt_fwd[i], rev[i], filt_rev[i])
15 b <- paste(strsplit(fwd[i], ".", fixed = TRUE)[[1]][1], "tab", sep = ".") 15 b <- paste(strsplit(fwd[i], ".", fixed = TRUE)[[1]][1], "tab", sep = ".")
16 write.table(ftout, b, quote = FALSE, sep = "\t", col.names = NA) 16 write.table(ftout, b, quote = FALSE, sep = "\t", col.names = NA)
17 } 17 }
18 18
19 # In the test only the 1st data set is used 19 # In the test only the 1st data set is used
20 t <- data.frame() 20 t <- data.frame()
21 t <- rbind(t, ftout[1, ]) 21 t <- rbind(t, ftout[1, ])
62 # dada 62 # dada
63 print("dada") 63 print("dada")
64 dada_fwd <- dada2::dada(filt_fwd, err_fwd) 64 dada_fwd <- dada2::dada(filt_fwd, err_fwd)
65 dada_rev <- dada2::dada(filt_rev, err_rev) 65 dada_rev <- dada2::dada(filt_rev, err_rev)
66 for (id in sample_names) { 66 for (id in sample_names) {
67 saveRDS(dada_fwd[[id]], file = paste("dada_", id, "_R1.Rdata", sep = "")) 67 saveRDS(dada_fwd[[id]], file = paste("dada_", id, "_R1.Rdata", sep = ""))
68 saveRDS(dada_rev[[id]], file = paste("dada_", id, "_R2.Rdata", sep = "")) 68 saveRDS(dada_rev[[id]], file = paste("dada_", id, "_R2.Rdata", sep = ""))
69 } 69 }
70 70
71 # merge pairs 71 # merge pairs
72 print("mergePairs") 72 print("mergePairs")
73 merged <- dada2::mergePairs(dada_fwd, filt_fwd, dada_rev, filt_rev) 73 merged <- dada2::mergePairs(dada_fwd, filt_fwd, dada_rev, filt_rev)
74 for (id in sample_names) { 74 for (id in sample_names) {
75 saveRDS(merged[[id]], file = paste("mergePairs_", id, ".Rdata", sep = "")) 75 saveRDS(merged[[id]], file = paste("mergePairs_", id, ".Rdata", sep = ""))
76 } 76 }
77 77
78 78
79 # make sequence table 79 # make sequence table
80 print("makeSequenceTable") 80 print("makeSequenceTable")
83 83
84 reads_per_seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum) 84 reads_per_seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum)
85 df <- data.frame(length = as.numeric(names(reads_per_seqlen)), count = reads_per_seqlen) 85 df <- data.frame(length = as.numeric(names(reads_per_seqlen)), count = reads_per_seqlen)
86 pdf("makeSequenceTable.pdf") 86 pdf("makeSequenceTable.pdf")
87 ggplot(data = df, aes(x = length, y = count)) + 87 ggplot(data = df, aes(x = length, y = count)) +
88 geom_col() + 88 geom_col() +
89 theme_bw() 89 theme_bw()
90 bequiet <- dev.off() 90 bequiet <- dev.off()
91 91
92 # remove bimera 92 # remove bimera
93 print("removeBimera") 93 print("removeBimera")
94 seqtab_nochim <- dada2::removeBimeraDenovo(seqtab) 94 seqtab_nochim <- dada2::removeBimeraDenovo(seqtab)
117 dada2::filterAndTrim(fwd, c("filterAndTrim_single_filters_F3D0_R1.fq.gz", "filterAndTrim_single_filters_F3D141_R1.fq.gz"), maxLen = 255, minLen = 60, maxN = 100, minQ = 13, maxEE = 1) 117 dada2::filterAndTrim(fwd, c("filterAndTrim_single_filters_F3D0_R1.fq.gz", "filterAndTrim_single_filters_F3D141_R1.fq.gz"), maxLen = 255, minLen = 60, maxN = 100, minQ = 13, maxEE = 1)
118 118
119 119
120 merged_nondef <- dada2::mergePairs(dada_fwd, filt_fwd, dada_rev, filt_rev, minOverlap = 8, maxMismatch = 1, justConcatenate = TRUE, trimOverhang = TRUE) 120 merged_nondef <- dada2::mergePairs(dada_fwd, filt_fwd, dada_rev, filt_rev, minOverlap = 8, maxMismatch = 1, justConcatenate = TRUE, trimOverhang = TRUE)
121 for (id in sample_names) { 121 for (id in sample_names) {
122 saveRDS(merged_nondef[[id]], file = paste("mergePairs_", id, "_nondefault.Rdata", sep = "")) 122 saveRDS(merged_nondef[[id]], file = paste("mergePairs_", id, "_nondefault.Rdata", sep = ""))
123 } 123 }
124 rb_dada_fwd <- dada2::removeBimeraDenovo(dada_fwd[["F3D0_S188_L001"]]) 124 rb_dada_fwd <- dada2::removeBimeraDenovo(dada_fwd[["F3D0_S188_L001"]])
125 write.table(rb_dada_fwd, file = "removeBimeraDenovo_F3D0_dada_uniques.tab", quote = FALSE, sep = "\t", row.names = TRUE, col.names = FALSE) 125 write.table(rb_dada_fwd, file = "removeBimeraDenovo_F3D0_dada_uniques.tab", quote = FALSE, sep = "\t", row.names = TRUE, col.names = FALSE)
126 126
127 rb_merged <- dada2::removeBimeraDenovo(merged, method = "pooled") 127 rb_merged <- dada2::removeBimeraDenovo(merged, method = "pooled")
128 saveRDS(rb_merged, file = "removeBimeraDenovo_F3D0_mergepairs.Rdata") 128 saveRDS(rb_merged, file = "removeBimeraDenovo_F3D0_mergepairs.Rdata")
129 129
130 # SeqCounts 130 # SeqCounts
131 get_n <- function(x) { 131 get_n <- function(x) {
132 sum(dada2::getUniques(x)) 132 sum(dada2::getUniques(x))
133 } 133 }
134 134
135 print("seqCounts ft") 135 print("seqCounts ft")
136 samples <- list() 136 samples <- list()
137 samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header = TRUE, sep = "\t", row.names = 1) 137 samples[["F3D0_S188_L001_R1_001.tab"]] <- read.table("F3D0_S188_L001_R1_001.tab", header = TRUE, sep = "\t", row.names = 1)