comparison stacks_procrad.xml @ 0:6f9e8593e571 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit f3a59c91c231cc1582479109e776d05602b7f24d-dirty
author iuc
date Tue, 14 Jun 2016 14:07:06 -0400
parents
children 11642e18f2b0
comparison
equal deleted inserted replaced
-1:000000000000 0:6f9e8593e571
1 <tool id="stacks_procrad" name="Stacks: process radtags" version="@WRAPPER_VERSION@.1">
2 <description>the Stacks demultiplexing script</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <command><![CDATA[
9
10 #if $input_type.options_type_selector == "single":
11
12 #if $input_type.input_single.is_of_type('fastqsanger'):
13 #set $ext = ".fq"
14 #set inputype = "fastq"
15 #else:
16 #set $ext = ".fq.gz"
17 #set inputype = "gzfastq"
18 #end if
19
20 ln -s "$input_type.input_single" R1$ext &&
21 #else
22
23 #if $input_type.inputs_paired1.is_of_type('fastqsanger'):
24 #set $ext = ".fq"
25 #set inputype = "fastq"
26 #else:
27 #set $ext = ".fq.gz"
28 #set inputype = "gzfastq"
29 #end if
30
31 ln -s "$input_type.inputs_paired1" R1$ext &&
32 ln -s "$input_type.inputs_paired2" R2$ext &&
33 #end if
34
35 mkdir stacks_outputs
36
37 &&
38
39 process_radtags
40
41 #if $input_type.options_type_selector == "single":
42 -f R1$ext
43 #else:
44 -1 R1$ext
45 -2 R2$ext
46 #end if
47
48 -i $inputype
49 -b "$barcode"
50
51 $input_type.barcode_encoding
52
53 #if str( $options_enzyme.options_enzyme_selector ) == "1":
54 -e $options_enzyme.enzyme
55 #else:
56 --renz_1 $options_enzyme.enzyme --renz_2 $options_enzyme.enzyme2
57 #end if
58
59 -y $outype
60
61 $capture
62
63 #if str($options_advanced.truncate):
64 -t $options_advanced.truncate
65 #end if
66
67 -w $options_advanced.sliding
68
69 $options_advanced.remove
70
71 $options_advanced.discard
72 -s $options_advanced.score
73
74 $options_advanced.rescue
75
76 -o stacks_outputs
77 ]]></command>
78
79 <inputs>
80 <conditional name="input_type">
81 <param name="options_type_selector" type="select" label="Single-end or paired-end reads files">
82 <option value="single" selected="True">Single-end files</option>
83 <option value="paired">Paired-end files</option>
84 </param>
85 <when value="single">
86 <param name="input_single" argument="-f" format="fastqsanger,fastq.gz" type="data" label="singles-end reads infile(s)" help="input files" />
87
88 <param name="barcode_encoding" type="select" label="Barcode location">
89 <option value="--inline_null" selected="True">Barcode is inline with sequence</option>
90 <option value="--index_null">Barcode is provided in FASTQ header</option>
91 </param>
92 </when>
93 <when value="paired">
94 <param name="inputs_paired1" argument="-1" format="fastqsanger,fastq.gz" type="data" label="paired-end reads infile(s) 1" help="Files must have this syntax : name_R1_001.fastq" />
95 <param name="inputs_paired2" argument="-2" format="fastqsanger,fastq.gz" type="data" label="paired-end reads infile(s) 2" help="Files must have this syntax : name_R2_001.fastq" />
96
97 <param name="barcode_encoding" type="select" label="Barcode location">
98 <option value="--inline_null" selected="True">Barcode is inline with sequence, only on the single-end read (read 1)</option>
99 <option value="--index_null">Barcode is provided in FASTQ header, only on the single-end read (read 1)</option>
100 <option value="--inline_inline">Barcode is inline with sequence, on both single and paired-end read (read 1 and 2)</option>
101 <option value="--index_index">Barcode is provided in FASTQ header, on both single and paired-end read (read 1 and 2)</option>
102 <option value="--inline_index">Barcode is inline with sequence on single-end read (read 1), and in FASTQ header for paired-end read (read 2)</option>
103 <option value="--index_inline">Barcode is provided in FASTQ header on single-end read (read 1), and is inline with sequence on paired-end read (read 2)</option>
104 </param>
105 </when>
106 </conditional>
107
108 <param name="barcode" argument="-b" type="data" format="tabular,txt" label="Barcode file" help="Barcode file" />
109
110 <conditional name="options_enzyme">
111 <param name="options_enzyme_selector" type="select" label="Number of enzymes">
112 <option value="1">One</option>
113 <option value="2">Two</option>
114 </param>
115 <when value="1">
116 <param name="enzyme" type="select" label="Enzyme" argument="-e" help="provide the restriction enzyme used" >
117 <expand macro="enzymes"/>
118 </param>
119 </when>
120 <when value="2">
121 <param name="enzyme" type="select" label="Enzyme" argument="--renz_1" help="provide the restriction enzyme used" >
122 <expand macro="enzymes"/>
123 </param>
124 <param name="enzyme2" type="select" label="Second enzyme" argument="--renz_2" help="provide the second restriction enzyme used" >
125 <expand macro="enzymes"/>
126 </param>
127 </when>
128 </conditional>
129
130 <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" />
131
132 <section name="options_advanced" title="advanced options" expanded="False">
133 <param name="sliding" type="float" value="0.15" argument="-w" label="Set the size of the sliding window as a fraction of the read length, between 0 and 1 (default 0.15)" />
134 <param name="remove" type="boolean" checked="false" truevalue="-c" falsevalue="" argument="-c" label="Clean data, remove any read with an uncalled base" />
135 <param name="discard" type="boolean" checked="false" truevalue="-q" falsevalue="" argument="-q" label="Discard reads with low quality scores"/>
136 <param name="score" type="integer" value="10" argument="-s" label="Set the score limit. If the average score within the sliding window drops below this value, the read is discarded (default 10)" />
137 <param name="rescue" type="boolean" checked="false" truevalue="-r" falsevalue="" argument="-r" label="Rescue barcodes and RAD-Tags?"/>
138 <param name="truncate" type="integer" value="" optional="True" argument="-t" label="Truncate final read length to this value" />
139 </section>
140
141 <!-- Stacks can produce fastq.gz and fasta.gz output but we don't propose it as they are not very common datatypes in galaxy -->
142 <param name="outype" argument="-y" type="select" label="Output format" help="output type, either 'fastq' or 'fasta'" >
143 <option value="fastq" selected="True">fastq</option>
144 <option value="fasta">fasta</option>
145 </param>
146 </inputs>
147
148 <outputs>
149 <data format="txt" name="output_log" label="results.log with ${tool.name} on ${on_string}: demultiplexed and cleaned reads" from_work_dir="stacks_outputs/process_radtags.log" />
150
151 <collection name="demultiplexed" type="list" label="Demultiplexed reads from ${on_string}">
152 <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fq" ext="fastqsanger" directory="stacks_outputs" />
153 <discover_datasets pattern="(?P&lt;name&gt;.+(\.[12])?)\.fa" ext="fasta" directory="stacks_outputs" />
154 </collection>
155 <collection name="remaining" type="list" label="Remaining orphan reads from ${on_string}">
156 <filter>input_type['options_type_selector'] == "paired"</filter>
157 <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fq" ext="fastqsanger" directory="stacks_outputs" />
158 <discover_datasets pattern="(?P&lt;name&gt;.+\.rem(\.[12])?)\.fa" ext="fasta" directory="stacks_outputs" />
159 </collection>
160 <collection name="discarded" type="list" label="${tool.name}: discarded reads from ${on_string}">
161 <filter>capture is True</filter>
162 <discover_datasets pattern="(?P&lt;name&gt;.+)\.fq\.discards" ext="fastqsanger" directory="stacks_outputs" />
163 <discover_datasets pattern="(?P&lt;name&gt;.+)\.fa\.discards" ext="fasta" directory="stacks_outputs" />
164 </collection>
165 </outputs>
166
167 <tests>
168 <test>
169 <param name="options_type_selector" value="single"/>
170 <param name="input_single" ftype="fastqsanger" value="procrad/R1.fq"/>
171 <param name="barcode" value="procrad/barcodes"/>
172 <param name="options_enzyme_selector" value="1"/>
173 <param name="enzyme" value="ecoRI"/>
174 <param name="discard" value="true"/>
175 <param name="capture" value="true"/>
176 <output name="output_log" file="procrad/process_radtags.out" compare="sim_size"/>
177 <output_collection name="demultiplexed">
178 <element name="PopA_01" compare="sim_size" file="demultiplexed/PopA_01.1.fq"/>
179 </output_collection>
180 <output_collection name="discarded">
181 <element name="R1">
182 <assert_contents>
183 <has_text text="lane1_fakedata0_11" />
184 </assert_contents>
185 </element>
186 </output_collection>
187 </test>
188 <test>
189 <param name="options_type_selector" value="paired"/>
190 <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/>
191 <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/>
192 <param name="barcode" value="procrad/barcodes"/>
193 <param name="options_enzyme_selector" value="1"/>
194 <param name="enzyme" value="ecoRI"/>
195 <param name="discard" value="true"/>
196 <param name="capture" value="true"/>
197 <output name="output_log" file="procrad/process_radtags.out" compare="sim_size"/>
198 <output_collection name="demultiplexed">
199 <element name="PopA_01.1" compare="sim_size" file="demultiplexed/PopA_01.1.fq"/>
200 </output_collection>
201 <output_collection name="remaining">
202 <element name="PopA_01.rem.1" compare="sim_size" file="demultiplexed/PopA_01.rem.1.fq"/>
203 </output_collection>
204 <output_collection name="discarded">
205 <element name="R1">
206 <assert_contents>
207 <has_text text="lane1_fakedata0_11" />
208 </assert_contents>
209 </element>
210 </output_collection>
211 </test>
212 <test>
213 <param name="options_type_selector" value="paired"/>
214 <param name="inputs_paired1" ftype="fastqsanger" value="procrad/R1.fq"/>
215 <param name="inputs_paired2" ftype="fastqsanger" value="procrad/R2.fq"/>
216 <param name="barcode" value="procrad/barcodes"/>
217 <param name="options_enzyme_selector" value="1"/>
218 <param name="enzyme" value="ecoRI"/>
219 <param name="discard" value="true"/>
220 <param name="capture" value="true"/>
221 <param name="outype" value="fasta"/>
222 <output name="output_log" file="procrad/process_radtags.out" compare="sim_size"/>
223 <output_collection name="demultiplexed">
224 <element name="PopA_01.1" compare="sim_size" file="demultiplexed/PopA_01.1.fa"/>
225 </output_collection>
226 <output_collection name="remaining">
227 <element name="PopA_01.rem.1" compare="sim_size" file="demultiplexed/PopA_01.rem.1.fa"/>
228 </output_collection>
229 <output_collection name="discarded">
230 <element name="R1">
231 <assert_contents>
232 <has_text text="lane1_fakedata0_11" />
233 </assert_contents>
234 </element>
235 </output_collection>
236 </test>
237 </tests>
238
239
240
241 <help>
242 <![CDATA[
243 .. class:: infomark
244
245 **What it does**
246
247 This program examines raw reads from an Illumina sequencing run and first, checks that the barcode and the RAD cutsite are intact, and demultiplexes the data. If there are errors in the barcode or the RAD site within a certain allowance process_radtags can correct them. Second, it slides a window down the length of the read and checks the average quality score within the window. If the score drops below 90% probability of being correct (a raw phred score of 10), the read is discarded. This allows for some seqeuncing errors while elimating reads where the sequence is degrading as it is being sequenced. By default the sliding window is 15% of the length of the read, but can be specified on the command line (the threshold and window size can be adjusted).
248
249 The process_radtags program can:
250
251 - handle data that is barcoded, either inline or using an index, or unbarcoded.
252 - use combinatorial barcodes.
253 - check and correct for a restriction enzyme cutsite for single or double-digested data.
254 - filter adapter sequence while allowing for sequencing error in the adapter pattern.
255 - process individual files or whole directories of files.
256 - directly read gzipped data
257 - filter reads based on Illumina's Chastity filter
258
259 **Help**
260
261 Input files:
262
263 - FASTQ
264
265 - Barcode File Format
266
267 The barcode file is a very simple format:
268
269 ======= ===========
270 Barcode Sample name
271 ======= ===========
272 ATGGGG PopA_01
273 GGGTAA PopA_02
274 AGGAAA PopA_03
275 TTTAAG PopA_04
276 GGTGTG PopA_05
277 TGATGT PopA_06
278 ======= ===========
279
280 Combinatorial barcodes are specified, one per column, separated by a tab:
281
282 ======== ======== ===========
283 Barcode1 Barcode2 Sample name
284 ======== ======== ===========
285 CGATA ACGTA PopA_01
286 CGGCG CGTA PopA_02
287 GAAGC CGTA PopA_03
288 GAGAT CGTA PopA_04
289 CGATA AGCA PopA_05
290 CGGCG AGCA PopA_06
291 ======== ======== ===========
292
293 @STACKS_INFOS@
294 ]]>
295 </help>
296 <expand macro="citation" />
297 </tool>