comparison test-data/gentest.R @ 0:1937c2b4da7a draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author iuc
date Fri, 08 Nov 2019 18:49:03 -0500
parents
children 5b752525bd6f
comparison
equal deleted inserted replaced
-1:000000000000 0:1937c2b4da7a
1 library(dada2, quietly=T)
2 library(ggplot2, quietly=T)
3
4 fwd <- c('F3D0_S188_L001_R1_001.fastq.gz')
5 rev <- c('F3D0_S188_L001_R2_001.fastq.gz')
6
7 sample.names <- c('F3D0_S188_L001')
8
9 names(fwd) <- sample.names
10 names(rev) <- sample.names
11
12
13 filt.fwd <- c('filterAndTrim_F3D0_R1.fq.gz')
14 filt.rev <- c('filterAndTrim_F3D0_R2.fq.gz')
15
16 ftout <- filterAndTrim(fwd, filt.fwd, rev, filt.rev)
17
18 # In the test no name can be given to the collection
19 rownames(ftout) <- c( 'Unnamed Collection' )
20 write.table(ftout, "filterAndTrim_F3D0.tab", quote=F, sep="\t", col.names=NA)
21
22 # Plot quality profile (just for one file, Galaxy compares with sim_size)
23
24 qp <- plotQualityProfile(fwd)
25 ggsave('qualityProfile.pdf', qp, width = 20,height = 15,units = c("cm"))
26
27 # Plot complexity (just for one file, Galaxy compares with sim_size)
28
29 cp <- plotComplexity(fwd)
30 ggsave('complexity.pdf', cp, width = 20,height = 15,units = c("cm"))
31
32
33 # learn Errors
34 err.fwd <- learnErrors(filt.fwd)
35 saveRDS(err.fwd, file='learnErrors_F3D0_R1.Rdata')
36 plot <- plotErrors(err.fwd)
37 ggsave('learnErrors_F3D0_R1.pdf', plot, width = 20,height = 15,units = c("cm"))
38
39 err.rev <- learnErrors(filt.fwd)
40 saveRDS(err.rev, file='learnErrors_F3D0_R2.Rdata')
41 plot <- plotErrors(err.rev)
42 ggsave('learnErrors_F3D0_R2.pdf', plot, width = 20,height = 15,units = c("cm"))
43
44 # dada
45 dada.fwd <- dada(filt.fwd, err.fwd)
46 saveRDS(dada.fwd, file="dada_F3D0_R1.Rdata")
47 dada.rev <- dada(filt.rev, err.rev)
48 saveRDS(dada.rev, file="dada_F3D0_R2.Rdata")
49
50 # merge pairs
51 merged <- mergePairs(dada.fwd, filt.fwd, dada.rev, filt.rev)
52 saveRDS(merged, file='mergePairs_F3D0.Rdata')
53
54 # make sequence table
55 seqtab <- makeSequenceTable(merged)
56 write.table(t(seqtab), file="makeSequenceTable_F3D0.tab", quote=F, sep="\t", row.names = T, col.names = NA)
57
58 reads.per.seqlen <- tapply(colSums(seqtab), factor(nchar(getSequences(seqtab))), sum)
59 df <- data.frame(length=as.numeric(names(reads.per.seqlen)), count=reads.per.seqlen)
60 pdf( 'makeSequenceTable_F3D0.pdf' )
61 ggplot(data=df, aes(x=length, y=count)) +
62 geom_col() +
63 theme_bw()
64 bequiet <- dev.off()
65
66 # remove bimera
67 seqtab.nochim <- removeBimeraDenovo(seqtab)
68 write.table(t(seqtab), file="removeBimeraDenovo_F3D0.tab", quote=F, sep="\t", row.names = T, col.names = NA)
69
70 # assign taxonomy/species
71 tl <- 'Level1,Level2,Level3,Level4,Level5'
72 tl <- strsplit(tl, ",")[[1]]
73
74 taxa <- assignTaxonomy(seqtab.nochim, 'reference.fa', outputBootstraps = T, taxLevels=c('Level1','Level2','Level3','Level4','Level5'))
75
76 taxa$tax <- addSpecies(taxa$tax, 'reference_species.fa')
77 write.table(taxa$tax, file = 'assignTaxonomyAddspecies_F3D0.tab', quote = F, sep = "\t", row.names = T, col.names = NA)
78
79 write.table(taxa$boot, file = 'assignTaxonomyAddspecies_F3D0_boot.tab', quote = F, sep = "\t", row.names = T, col.names = NA)
80
81
82
83 ## Generate extra test data for parameter testing
84
85 filterAndTrim(fwd, c('filterAndTrim_single_F3D0_R1.fq.gz'), rm.phix = T, orient.fwd = 'TACGG')
86
87 filterAndTrim(fwd, c('filterAndTrim_single_trimmers_F3D0_R1.fq.gz'), truncQ = 30, truncLen = 2, trimLeft = 150, trimRight = 2)
88
89 filterAndTrim(fwd, c('filterAndTrim_single_filters_F3D0_R1.fq.gz'), maxLen = 255, minLen = 60, maxN = 100, minQ = 13, maxEE = 1)
90
91
92 merged_nondef <- mergePairs(dada.fwd, filt.fwd, dada.rev, filt.rev, minOverlap = 8, maxMismatch = 1, justConcatenate = TRUE, trimOverhang = TRUE)
93 saveRDS(merged_nondef, file='mergePairs_F3D0_nondefault.Rdata')
94
95 rb.dada.fwd <- removeBimeraDenovo(dada.fwd)
96 write.table(rb.dada.fwd, file = 'removeBimeraDenovo_F3D0_dada_uniques.tab', quote = F, sep = "\t", row.names = T, col.names = F)
97
98 rb.merged <- removeBimeraDenovo(merged, method="pooled")
99 saveRDS(rb.merged, file='removeBimeraDenovo_F3D0_mergepairs.Rdata')