comparison gmap/gmap.xml @ 2:52da588232b0

Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
author Jim Johnson <jj@umn.edu>
date Fri, 21 Oct 2011 11:38:55 -0500
parents d58d272914e7
children
comparison
equal deleted inserted replaced
1:30d42bb409b8 2:52da588232b0
1 <tool id="gmap" name="GMAP" version="2.0.0"> 1 <tool id="gmap" name="GMAP" version="2.0.0">
2 <description>Genomic Mapping and Alignment Program for mRNA and EST sequences</description> 2 <description>Genomic Mapping and Alignment Program for mRNA and EST sequences</description>
3 <requirements> 3 <requirements>
4 <requirement type="binary">gmap</requirement> 4 <requirement type="binary">gmap</requirement>
5 <!-- proposed tag for added datatype dependencies -->
6 <requirement type="datatype">gmapdb</requirement>
7 <requirement type="datatype">gmap_annotation</requirement>
8 <requirement type="datatype">gmap_splicesites</requirement>
9 <requirement type="datatype">gmap_introns</requirement>
10 <requirement type="datatype">gmap_snps</requirement>
5 </requirements> 11 </requirements>
6 <version_string>gmap --version</version_string> 12 <version_string>gmap --version</version_string>
7 <command> 13 <command>
8 #import os,os.path 14 #import os,os.path
9 gmap 15 gmap
111 #else 117 #else
112 2> $gmap_stderr > $output 118 2> $gmap_stderr > $output
113 #end if 119 #end if
114 </command> 120 </command>
115 <inputs> 121 <inputs>
122 <!-- Input data -->
123 <param name="input" type="data" format="fasta,fastqsanger,fastqillumina" label="&lt;H2&gt;Input Sequences&lt;/H2&gt;Select an mRNA or EST dataset to map" />
124 <repeat name="inputs" title="addtional mRNA or EST dataset to map">
125 <param name="added_input" type="data" format="fasta,fastqsanger,fastqillumina" label=""/>
126 </repeat>
127 <param name="quality_protocol" type="select" label="Protocol for input quality scores">
128 <option value="">No quality scores</option>
129 <option value="sanger">Sanger quality scores</option>
130 <option value="illumina">Illumina quality scores</option>
131 </param>
132
133 <!-- GMAPDB for mapping -->
116 <conditional name="refGenomeSource"> 134 <conditional name="refGenomeSource">
117 <param name="genomeSource" type="select" label="Will you map to a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> 135 <param name="genomeSource" type="select" label="&lt;HR&gt;&lt;H2&gt;Map To&lt;/H2&gt;Will you map to a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
118 <option value="indexed">Use a built-in index</option> 136 <option value="indexed">Use a built-in index</option>
119 <option value="gmapdb">Use gmapdb from the history</option> 137 <option value="gmapdb">Use gmapdb from the history</option>
120 <option value="history">Use a fasta reference sequence from the history</option> 138 <option value="history">Use a fasta reference sequence from the history</option>
121 </param> 139 </param>
122 <when value="indexed"> 140 <when value="indexed">
153 </param> 171 </param>
154 </when> 172 </when>
155 <when value="gmapdb"> 173 <when value="gmapdb">
156 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb" 174 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb"
157 help="A GMAP database built with GMAP Build"/> 175 help="A GMAP database built with GMAP Build"/>
158 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb"
159 help="A GMAP database built with GMAP Build"/>
160 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size"> 176 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
161 <options> 177 <options>
162 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/> 178 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
163 </options> 179 </options>
164 </param> 180 </param>
172 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome" 188 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome"
173 help="Fasta containing genomic DNA sequence"/> 189 help="Fasta containing genomic DNA sequence"/>
174 </when> 190 </when>
175 </conditional> 191 </conditional>
176 192
177 <!-- Input data --> 193
178 <param name="input" type="data" format="fasta,fastqsanger,fastqillumina" label="Select an mRNA or EST dataset to map" /> 194 <!-- Computation options -->
179 <repeat name="inputs" title="addtional mRNA or EST dataset to map"> 195 <conditional name="computation">
180 <param name="added_input" type="data" format="fasta,fastqsanger,fastqillumina" label=""/> 196 <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help="">
181 </repeat> 197 <option value="default">Use default settings</option>
182 <param name="quality_protocol" type="select" label="Protocol for input quality scores"> 198 <option value="advanced">Set Computation Options</option>
183 <option value="">No quality scores</option> 199 </param>
184 <option value="sanger">Sanger quality scores</option> 200 <when value="default"/>
185 <option value="illumina">Illumina quality scores</option> 201 <when value="advanced">
186 </param> 202 <param name="nosplicing" type="boolean" truevalue="--nosplicing" falsevalue="" checked="false" label="Turn off splicing" help="(useful for aligning genomic sequences onto a genome)"/>
203 <param name="min_intronlength" type="integer" value="9" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." />
204 <param name="intronlength" type="integer" value="1000000" label="Max length for one intron (default 1000000)" />
205 <param name="localsplicedist" type="integer" value="200000" label="Max length for known splice sites at ends of sequence (default 200000)" />
206 <param name="totallength" type="integer" value="2400000" label="Max total intron length (default 2400000)" />
207 <param name="chimera_margin" type="integer" value="40" label="Amount of unaligned sequence that triggers search for a chimera (default is 40, 0 is off)" />
208 <param name="direction" type="select" label="cDNA direction">
209 <option value="auto">auto</option>
210 <option value="sense_force">sense_force</option>
211 <option value="antisense_force">antisense_force</option>
212 <option value="sense_filter">sense_filter</option>
213 <option value="antisense_filter">antisense_filter</option>
214 </param>
215 <param name="trimendexons" type="integer" value="12" label="Trim end exons with fewer than given number of matches (in nt, default 12)" />
216 <param name="cross_species" type="boolean" truevalue="--cross-species" falsevalue="" checked="false" label="Cross-species alignment" help="For cross-species alignments, use a more sensitive search for canonical splicing"/>
217
218 <param name="canonical" type="select" label="Reward for canonical and semi-canonical introns">
219 <option value="1">high reward (default)</option>
220 <option value="0">low reward</option>
221 <option value="2">low reward for high-identity sequences</option>
222 </param>
223 <param name="allow_close_indels" type="select" label="Allow an insertion and deletion close to each other">
224 <option value="1" selected="true">yes (default)</option>
225 <option value="0">no</option>
226 <option value="2">only for high-quality alignments</option>
227 </param>
228 <param name="microexon_spliceprob" type="float" value="0.90" label="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" >
229 <validator type="in_range" message="slice probability between 0.00 and 1.00" min="0" max="1"/>
230 </param>
231 <param name="prunelevel" type="select" label="Pruning level">
232 <option value="0">no pruning (default)</option>
233 <option value="1">poor sequences</option>
234 <option value="2">repetitive sequences</option>
235 <option value="3">poor and repetitive sequences</option>
236 </param>
237 <!-- could do this as a config file
238 <param name="chrsubsetfile" type="data" format="fasta" label="User-supplied chromosome subset file" />
239 <param name="chrsubset" type="text" label="Chromosome subset to search" />
240 -->
241 </when>
242 </conditional>
243
244 <!-- Advanced Settings -->
245 <conditional name="advanced">
246 <param name="options" type="select" label="&lt;HR&gt;Advanced Settings" help="">
247 <option value="default">Use default settings</option>
248 <option value="used">Set Options</option>
249 </param>
250 <when value="default"/>
251 <when value="used">
252 <param name="nolengths" type="boolean" checked="false" truevalue="--nolengths=true" falsevalue="" label="No intron lengths in alignment"/>
253 <param name="invertmode" type="select" label=" Mode for alignments to genomic (-) strand" help="">
254 <option value="">Don't invert the cDNA (default)</option>
255 <option value="--invertmode=1">Invert cDNA and print genomic (-) strand</option>
256 <option value="--invertmode=2">Invert cDNA and print genomic (+) strand</option>
257 </param>
258 <param name="introngap" type="integer" value="3" label="Nucleotides to show on each end of intron (default=3)" />
259 <param name="wraplength" type="integer" value="50" label="Line Wrap length for alignment (default=50)" />
260 <param name="npaths" type="integer" value="-1" optional="true"
261 label="Maximum number of paths to show. Ignored if negative. If 0, prints two paths if chimera detected, else one." />
262 <param name="chimera_overlap" type="integer" value="0" label="Overlap to show, if any, at chimera breakpoint" />
263 <param name="tolerant" type="boolean" checked="false" truevalue="--tolerant=true" falsevalue=""
264 label="Translates cDNA with corrections for frameshifts"/>
265 <param name="protein" type="select" label="Protein alignment" help="">
266 <option value="">default</option>
267 <option value="--fulllength=true">Assume full-length protein, starting with Met</option>
268 <option value="--truncate=true">Truncate alignment around full-length protein, Met to Stop</option>
269 </param>
270 </when>
271 </conditional>
272
273 <!-- Output data -->
187 <conditional name="result"> 274 <conditional name="result">
188 <param name="format" type="select" label="Select the output format" help=""> 275 <param name="format" type="select" label="&lt;HR&gt;&lt;H2&gt;Output&lt;/H2&gt;Select the output format" help="">
189 <option value="gmap">GMAP default output</option> 276 <option value="gmap">GMAP default output</option>
190 <option value="summary">Summary of alignments</option> 277 <option value="summary">Summary of alignments</option>
191 <option value="align">Alignment</option> 278 <option value="align">Alignment</option>
192 <option value="continuous">Alignment in three continuous lines</option> 279 <option value="continuous">Alignment in three continuous lines</option>
193 <option value="continuous-by-exon">Alignment in three lines per exon</option> 280 <option value="continuous-by-exon">Alignment in three lines per exon</option>
244 <param name="read_group_platform" type="text" value="" label="Value to put into read-group library platform (RG-PL) field"/> 331 <param name="read_group_platform" type="text" value="" label="Value to put into read-group library platform (RG-PL) field"/>
245 </when> 332 </when>
246 </conditional> <!-- name="result" --> 333 </conditional> <!-- name="result" -->
247 334
248 <param name="split_output" type="boolean" truevalue="--split-output=gmap_out" falsevalue="" checked="false" label="Separate outputs for nomapping, uniq, mult, and chimera" help="(chimera only when chimera-margin is selected)"/> 335 <param name="split_output" type="boolean" truevalue="--split-output=gmap_out" falsevalue="" checked="false" label="Separate outputs for nomapping, uniq, mult, and chimera" help="(chimera only when chimera-margin is selected)"/>
249 336
250 <conditional name="computation"> 337
251 <param name="options" type="select" label="Computational Settings" help="">
252 <option value="default">Use default settings</option>
253 <option value="advanced">Set Computation Options</option>
254 </param>
255 <when value="default"/>
256 <when value="advanced">
257 <param name="nosplicing" type="boolean" truevalue="--nosplicing" falsevalue="" checked="false" label="Turn off splicing" help="(useful for aligning genomic sequences onto a genome)"/>
258 <param name="min_intronlength" type="integer" value="9" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." />
259 <param name="intronlength" type="integer" value="1000000" label="Max length for one intron (default 1000000)" />
260 <param name="localsplicedist" type="integer" value="200000" label="Max length for known splice sites at ends of sequence (default 200000)" />
261 <param name="totallength" type="integer" value="2400000" label="Max total intron length (default 2400000)" />
262 <param name="chimera_margin" type="integer" value="40" label="Amount of unaligned sequence that triggers search for a chimera (default is 40, 0 is off)" />
263 <param name="direction" type="select" label="cDNA direction">
264 <option value="auto">auto</option>
265 <option value="sense_force">sense_force</option>
266 <option value="antisense_force">antisense_force</option>
267 <option value="sense_filter">sense_filter</option>
268 <option value="antisense_filter">antisense_filter</option>
269 </param>
270 <param name="trimendexons" type="integer" value="12" label="Trim end exons with fewer than given number of matches (in nt, default 12)" />
271 <param name="cross_species" type="boolean" truevalue="--cross-species" falsevalue="" checked="false" label="Cross-species alignment" help="For cross-species alignments, use a more sensitive search for canonical splicing"/>
272
273 <param name="canonical" type="select" label="Reward for canonical and semi-canonical introns">
274 <option value="1">high reward (default)</option>
275 <option value="0">low reward</option>
276 <option value="2">low reward for high-identity sequences</option>
277 </param>
278 <param name="allow_close_indels" type="select" label="Allow an insertion and deletion close to each other">
279 <option value="1" selected="true">yes (default)</option>
280 <option value="0">no</option>
281 <option value="2">only for high-quality alignments</option>
282 </param>
283 <param name="microexon_spliceprob" type="float" value="0.90" label="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" >
284 <validator type="in_range" message="slice probability between 0.00 and 1.00" min="0" max="1"/>
285 </param>
286 <param name="prunelevel" type="select" label="Pruning level">
287 <option value="0">no pruning (default)</option>
288 <option value="1">poor sequences</option>
289 <option value="2">repetitive sequences</option>
290 <option value="3">poor and repetitive sequences</option>
291 </param>
292 <!-- could do this as a config file
293 <param name="chrsubsetfile" type="data" format="fasta" label="User-supplied chromosome subset file" />
294 <param name="chrsubset" type="text" label="Chromosome subset to search" />
295 -->
296 </when>
297 </conditional>
298 <conditional name="advanced">
299 <param name="options" type="select" label="Advanced Settings" help="">
300 <option value="default">Use default settings</option>
301 <option value="used">Set Options</option>
302 </param>
303 <when value="default"/>
304 <when value="used">
305 <param name="nolengths" type="boolean" checked="false" truevalue="--nolengths=true" falsevalue="" label="No intron lengths in alignment"/>
306 <param name="invertmode" type="select" label=" Mode for alignments to genomic (-) strand" help="">
307 <option value="">Don't invert the cDNA (default)</option>
308 <option value="--invertmode=1">Invert cDNA and print genomic (-) strand</option>
309 <option value="--invertmode=2">Invert cDNA and print genomic (+) strand</option>
310 </param>
311 <param name="introngap" type="integer" value="3" label="Nucleotides to show on each end of intron (default=3)" />
312 <param name="wraplength" type="integer" value="50" label="Line Wrap length for alignment (default=50)" />
313 <param name="npaths" type="integer" value="-1" optional="true"
314 label="Maximum number of paths to show. Ignored if negative. If 0, prints two paths if chimera detected, else one." />
315 <param name="chimera_overlap" type="integer" value="0" label="Overlap to show, if any, at chimera breakpoint" />
316 <param name="tolerant" type="boolean" checked="false" truevalue="--tolerant=true" falsevalue=""
317 label="Translates cDNA with corrections for frameshifts"/>
318 <param name="protein" type="select" label="Protein alignment" help="">
319 <option value="">default</option>
320 <option value="--fulllength=true">Assume full-length protein, starting with Met</option>
321 <option value="--truncate=true">Truncate alignment around full-length protein, Met to Stop</option>
322 </param>
323 </when>
324 </conditional>
325 <!-- 338 <!--
326 map=iitfile Map file. If argument is '?' (with the quotes), this lists available map files. 339 map=iitfile Map file. If argument is '?' (with the quotes), this lists available map files.
327 mapexons Map each exon separately 340 mapexons Map each exon separately
328 mapboth Report hits from both strands of genome 341 mapboth Report hits from both strands of genome
329 flanking=INT Show flanking hits (default 0) 342 flanking=INT Show flanking hits (default 0)
330 print-comment Show comment line for each hit 343 print-comment Show comment line for each hit
331 --> 344 -->
345
346
332 </inputs> 347 </inputs>
333 <outputs> 348 <outputs>
334 <data format="txt" name="gmap_stderr" label="${tool.name} on ${on_string}: log"/> 349 <data format="txt" name="gmap_stderr" label="${tool.name} on ${on_string}: stderr"/>
335 <data format="txt" name="output" label="${tool.name} on ${on_string} ${result.format}" > 350 <data format="txt" name="output" label="${tool.name} on ${on_string} ${result.format}" >
336 <filter>(split_output == False)</filter> 351 <filter>(split_output == False)</filter>
337 <change_format> 352 <change_format>
338 <when input="result['format']" value="gff3_gene" format="gff3"/> 353 <when input="result['format']" value="gff3_gene" format="gff3"/>
339 <when input="result['format']" value="gff3_match_cdna" format="gff3"/> 354 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
340 <when input="result['format']" value="gff3_match_est" format="gff3"/> 355 <when input="result['format']" value="gff3_match_est" format="gff3"/>
341 <when input="result['format']" value="sam" format="sam"/> 356 <when input="result['format']" value="sam" format="sam"/>
342 <!--
343 <when input="result['format']" value="splicesites" format="gmap_splicesites"/> 357 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
344 <when input="result['format']" value="introns" format="gmap_introns"/> 358 <when input="result['format']" value="introns" format="gmap_introns"/>
345 --> 359 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
360 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
346 </change_format> 361 </change_format>
347 </data> 362 </data>
348 <data format="txt" name="uniq" label="${tool.name} on ${on_string} uniq.${result.format}" from_work_dir="gmap_out.uniq"> 363 <data format="txt" name="uniq" label="${tool.name} on ${on_string} uniq.${result.format}" from_work_dir="gmap_out.uniq">
349 <filter>(split_output == True)</filter> 364 <filter>(split_output == True)</filter>
350 <change_format> 365 <change_format>
351 <when input="result['format']" value="gff3_gene" format="gff3"/> 366 <when input="result['format']" value="gff3_gene" format="gff3"/>
352 <when input="result['format']" value="gff3_match_cdna" format="gff3"/> 367 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
353 <when input="result['format']" value="gff3_match_est" format="gff3"/> 368 <when input="result['format']" value="gff3_match_est" format="gff3"/>
354 <when input="result['format']" value="sam" format="sam"/> 369 <when input="result['format']" value="sam" format="sam"/>
370 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
371 <when input="result['format']" value="introns" format="gmap_introns"/>
372 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
373 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
355 </change_format> 374 </change_format>
356 </data> 375 </data>
357 <data format="txt" name="transloc" label="${tool.name} on ${on_string} transloc.${result.format}" from_work_dir="gmap_out.transloc"> 376 <data format="txt" name="transloc" label="${tool.name} on ${on_string} transloc.${result.format}" from_work_dir="gmap_out.transloc">
358 <filter>(split_output == True)</filter> 377 <filter>(split_output == True)</filter>
359 <change_format> 378 <change_format>
360 <when input="result['format']" value="gff3_gene" format="gff3"/> 379 <when input="result['format']" value="gff3_gene" format="gff3"/>
361 <when input="result['format']" value="gff3_match_cdna" format="gff3"/> 380 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
362 <when input="result['format']" value="gff3_match_est" format="gff3"/> 381 <when input="result['format']" value="gff3_match_est" format="gff3"/>
363 <when input="result['format']" value="sam" format="sam"/> 382 <when input="result['format']" value="sam" format="sam"/>
383 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
384 <when input="result['format']" value="introns" format="gmap_introns"/>
385 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
386 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
364 </change_format> 387 </change_format>
365 </data> 388 </data>
366 <data format="txt" name="nomapping" label="${tool.name} on ${on_string} nomapping.${result.format}" from_work_dir="gmap_out.nomapping"> 389 <data format="txt" name="nomapping" label="${tool.name} on ${on_string} nomapping.${result.format}" from_work_dir="gmap_out.nomapping">
367 <filter>(split_output == True)</filter> 390 <filter>(split_output == True)</filter>
368 <change_format> 391 <change_format>
369 <when input="result['format']" value="gff3_gene" format="gff3"/> 392 <when input="result['format']" value="gff3_gene" format="gff3"/>
370 <when input="result['format']" value="gff3_match_cdna" format="gff3"/> 393 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
371 <when input="result['format']" value="gff3_match_est" format="gff3"/> 394 <when input="result['format']" value="gff3_match_est" format="gff3"/>
372 <when input="result['format']" value="sam" format="sam"/> 395 <when input="result['format']" value="sam" format="sam"/>
396 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
397 <when input="result['format']" value="introns" format="gmap_introns"/>
398 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
399 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
373 </change_format> 400 </change_format>
374 </data> 401 </data>
375 <data format="txt" name="mult" label="${tool.name} on ${on_string} mult.${result.format}" from_work_dir="gmap_out.mult"> 402 <data format="txt" name="mult" label="${tool.name} on ${on_string} mult.${result.format}" from_work_dir="gmap_out.mult">
376 <filter>(split_output == True)</filter> 403 <filter>(split_output == True)</filter>
377 <change_format> 404 <change_format>
378 <when input="result['format']" value="gff3_gene" format="gff3"/> 405 <when input="result['format']" value="gff3_gene" format="gff3"/>
379 <when input="result['format']" value="gff3_match_cdna" format="gff3"/> 406 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
380 <when input="result['format']" value="gff3_match_est" format="gff3"/> 407 <when input="result['format']" value="gff3_match_est" format="gff3"/>
381 <when input="result['format']" value="sam" format="sam"/> 408 <when input="result['format']" value="sam" format="sam"/>
409 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
410 <when input="result['format']" value="introns" format="gmap_introns"/>
411 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
412 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
382 </change_format> 413 </change_format>
383 </data> 414 </data>
384 </outputs> 415 </outputs>
385 <tests> 416 <tests>
386 </tests> 417 </tests>