comparison dada2_dada.xml @ 0:ce4aec98949d draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dada2 commit f8b6b6e72914ad6bcca8423dfa03f59bde80992e"
author iuc
date Fri, 08 Nov 2019 18:48:34 -0500
parents
children 6b4ddc3b64bd
comparison
equal deleted inserted replaced
-1:000000000000 0:ce4aec98949d
1 <tool id="dada2_dada" name="dada2: dada" version="@DADA2_VERSION@+galaxy@WRAPPER_VERSION@" profile="19.09">
2 <description>Remove sequencing errors</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <expand macro="version_command"/>
9 <command detect_errors="exit_code"><![CDATA[
10 #if $batch_cond.batch_select == "no"
11 mkdir output &&
12 #end if
13 Rscript '$dada2_script' \${GALAXY_SLOTS:-1}
14 ]]></command>
15 <configfiles>
16 <configfile name="dada2_script"><![CDATA[
17 library(ggplot2, quietly=T)
18 library(dada2, quietly=T)
19
20 args <- commandArgs(trailingOnly = TRUE)
21 nthreads <- as.integer(args[1])
22
23 derep <- c()
24 #if $batch_cond.batch_select == "no"
25 #for $d in $batch_cond.derep:
26 derep <- c(derep, '$d')
27 #end for
28 #else
29 derep <- c(derep, '$batch_cond.derep')
30 #end if
31
32 err <- readRDS('$err')
33
34 #if $batch_cond.batch_select == "yes":
35 pool <- F
36 #else
37 #if $batch_cond.pool == "TRUE"
38 pool <- T
39 #else if $batch_cond.pool == "FALSE"
40 pool <- F
41 #else
42 pool <- 'pseudo'
43 #end if
44 #end if
45
46 ## the Galaxy wrapper does not implement the arguments
47 ## - errorEstimationFunction = $errfoo,
48 ## - selfConsist = $selfconsist
49 ## since they are probably not relevant for the end user
50 dada_result <- dada(derep, err,
51 pool = pool, multithread = nthreads)
52
53 #if $batch_cond.batch_select == "no":
54 #if len($batch_cond.derep) > 1:
55 for( id in names(dada_result) ){
56 saveRDS(dada_result[[id]], file=file.path("output" ,paste(id, "dada2_dada", sep=".")))
57 }
58 #else
59 #for $d in $batch_cond.derep:
60 saveRDS(dada_result, file=file.path("output" ,paste('$d.element_identifier', "dada2_dada", sep=".")))
61 #end for
62 #end if
63 #else
64 saveRDS(dada_result, file='$dada')
65 #end if
66 ]]></configfile>
67 </configfiles>
68 <inputs>
69 <conditional name="batch_cond">
70 <param name="batch_select" type="select" label="Process samples in batches" help="process samples jointly (default) or in independent jobs (see also below)">
71 <option value="no">no</option>
72 <option value="yes">yes</option>
73 </param>
74 <when value="yes">
75 <param argument="derep" type="data" format="fastq,fastq.gz" label="Reads" help="despite the parameter name the sequences don't need to be dereplicated "/>
76 </when>
77 <when value="no">
78 <param argument="derep" type="data" multiple="true" format="fastq,fastq.gz" label="Reads" help="despite the parameter name the sequences don't need to be dereplicated "/>
79 <param argument="pool" type="select" label="Pool samples">
80 <option value="FALSE">process samples individually</option>
81 <option value="TRUE">pool samples</option>
82 <option value="pseudo">pseudo pooling between individually processed samples</option>
83 </param>
84 </when>
85 </conditional>
86 <param argument="err" type="data" format="dada2_errorrates" label="Error rates"/>
87 <!-- not needed for end user I guess
88 <expand macro="errorEstimationFunction"/>
89 <param name="selfconsist" type="boolean" checked="false" truevalue="TRUE" falsevalue="FALSE" label="Alternate between sample inference and error rate estimation until convergence"/>-->
90 </inputs>
91 <outputs>
92 <data name="dada" format="dada2_dada">
93 <filter>batch_cond['batch_select']=="yes"</filter>
94 </data>
95 <collection name="data_collection" type="list">
96 <discover_datasets pattern="(?P&lt;name&gt;.+)\.dada2_dada" format="dada2_dada" directory="output" />
97 <filter>batch_cond['batch_select']=="no"</filter>
98 </collection>
99 </outputs>
100 <tests>
101 <!-- default, non batch -->
102 <test>
103 <param name="batch_cond|batch_select" value="no"/>
104 <param name="batch_cond|derep" value="filterAndTrim_F3D0_R1.fq.gz" ftype="fastq.gz" />
105 <param name="err" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" />
106 <output_collection name="data_collection" type="list">
107 <element name="filterAndTrim_F3D0_R1.fq.gz" file="dada_F3D0_R1.Rdata" ftype="dada2_dada"/>
108 </output_collection>
109 </test>
110 <!-- default, batch -->
111 <test>
112 <param name="batch_cond|batch_select" value="yes"/>
113 <param name="batch_cond|derep" value="filterAndTrim_F3D0_R1.fq.gz" ftype="fastq.gz" />
114 <param name="err" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" />
115 <output name="dada" value="dada_F3D0_R1.Rdata" ftype="dada2_dada" />
116 </test>
117 <!-- test non-default options -->
118 <test>
119 <param name="batch_cond|batch_select" value="no"/>
120 <param name="batch_cond|derep" value="filterAndTrim_F3D0_R1.fq.gz" ftype="fastq.gz" />
121 <param name="batch_cond|pool" value="pseudo"/>
122 <param name="err" value="learnErrors_F3D0_R1.Rdata" ftype="dada2_errorrates" />
123 <output_collection name="data_collection" type="list">
124 <element name="filterAndTrim_F3D0_R1.fq.gz" file="dada_F3D0_R1.Rdata" ftype="dada2_dada"/>
125 </output_collection>
126 </test>
127 </tests>
128 <help><![CDATA[
129 Description
130 ...........
131
132 The dada function takes as input amplicon sequencing reads and returns the inferred
133 composition of the sample (or samples). Put another way, dada removes all sequencing errors to
134 reveal the members of the sequenced community.
135
136 Usage
137 .....
138
139 **Input:**
140
141 - A number of fastq(.gz) files (given as collection or multiple data sets)
142 - An dada2_errorrates data set computed with learnErrors
143
144 You can decide to compute the data jointly or in batches.
145
146 - Jointly (Process "samples in batches"=no): A single Galaxy job is started that processes all fastq data sets jointly. You may chose different pooling strategies: if the started dada job processes the samples individually, pooled, or pseudo pooled.
147 - In batches (Process "samples in batches"=yes): A separate Galaxy job is started for earch fastq data set. This is equivalent to joint processing and choosing to process samples individually.
148
149 While the single dada job (in case of joint processing) can use multiple cores on one compute node, batched processing distributes the work on a number of jobs (equal to the number of input fastq data sets) where each can use multiple cores. Hence, if you intend to or need to process the data sets individually, batched processing is more efficient -- in particular if Galaxy has access to a larger number of compute resources.
150
151 A typical use case of individual processing of the samples are large data sets for which the pooled strategy needs to much time or memory. Pseudo-pooling is recommended for those interested in detecting singleton ASVs in their samples
152
153
154 **Output**: a data set of type dada2_dada (which is a RData file containing the output of dada2's dada function).
155
156 The output of this tool can serve as input for *dada2: mergePairs*, *dada2: removeBimeraDinovo*, and "dada2: makeSequenceTable"
157
158 Details
159 .......
160
161 Briefly, dada implements a statistical test for the notion that a specific sequence was seen too many times to have been caused by amplicon errors from currently inferred sample sequences. Overly abundant sequences are used as the seeds of new partitions of sequencing reads, and the final set of partitions is taken to represent the denoised composition of the sample. A more detailed explanation of the algorithm is found in the dada2 puplication (see below) and https://doi.org/10.1186/1471-2105-13-283. dada depends on a parametric error model of substitutions. Thus the quality of its sample inference is affected by the accuracy of the estimated error rates. All comparisons between sequences performed by dada depend on pairwise alignments. This step is the most computationally intensive part of the algorithm, and two alignment heuristics have been implemented in dada for speed: A kmer-distance screen and banded Needleman-Wunsch alignmemt.
162
163 @HELP_OVERVIEW@
164 ]]></help>
165 <expand macro="citations"/>
166 </tool>