comparison gmap/gsnap.xml @ 2:52da588232b0

Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
author Jim Johnson <jj@umn.edu>
date Fri, 21 Oct 2011 11:38:55 -0500
parents d58d272914e7
children f49f5a460c74
comparison
equal deleted inserted replaced
1:30d42bb409b8 2:52da588232b0
1 <tool id="gsnap" name="GSNAP" version="2.0.0"> 1 <tool id="gsnap" name="GSNAP" version="2.0.0">
2 <description>Genomic Short-read Nucleotide Alignment Program</description> 2 <description>Genomic Short-read Nucleotide Alignment Program</description>
3 <requirements> 3 <requirements>
4 <requirement type="binary">gsnap</requirement> 4 <requirement type="binary">gsnap</requirement>
5 <!-- proposed tag for added datatype dependencies -->
6 <requirement type="datatype">gmapdb</requirement>
7 <requirement type="datatype">gmapsnpindex</requirement>
8 <requirement type="datatype">splicesites.iit</requirement>
9 <requirement type="datatype">introns.iit</requirement>
5 </requirements> 10 </requirements>
6 <version_string>gsnap --version</version_string> 11 <version_string>gsnap --version</version_string>
7 <command> 12 <command>
8 #import os.path, re 13 #import os.path, re
9 gsnap 14 gsnap
10 --nthreads="4" --ordered 15 --nthreads="4" --ordered
11 #if $refGenomeSource.genomeSource == "history": 16 #if $refGenomeSource.genomeSource == "gmapdb":
12 --gseg=$refGenomeSource.ownFile
13 #elif $refGenomeSource.genomeSource == "gmapdb":
14 #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0] 17 #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
15 --dir=$refGenomeSource.gmapdb.extra_files_path --db=$gmapdb 18 --dir=$refGenomeSource.gmapdb.extra_files_path --db=$refGenomeSource.gmapdb.metadata.db_name
16 #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
17 --kmer=$refGenomeSource.kmer
18 #end if
19 #if $refGenomeSource.splicemap != None and len($refGenomeSource.splicemap.__str__) == 2:
20 --use-splices=$refGenomeSource.splicemap
21 #end if
22 #if $refGenomeSource.snpindex != None and len($refGenomeSource.snpindex.__str__) == 2:
23 --use-snps=$refGenomeSource.snpindex
24 #end if
25 #else: 19 #else:
26 --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value) 20 --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value)
27 #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2: 21 #end if
28 --kmer=$refGenomeSource.kmer 22 #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
29 #end if 23 --kmer=$refGenomeSource.kmer
24 #end if
25 #if $refGenomeSource.use_splicing.src == 'gmapdb':
26 #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
27 -s $refGenomeSource.use_splicing.splicemap.value
28 #end if
29 #elif $refGenomeSource.use_splicing.src == 'history':
30 #if $refGenomeSource.use_splicing.splicemap != None and len($refGenomeSource.use_splicing.splicemap.__str__) > 0:
31 -S $os.path.dirname($refGenomeSource.use_splicing.splicemap) -s $os.path.basename($refGenomeSource.use_splicing.splicemap)
32 #end if
33 #end if
34 #if $refGenomeSource.use_snps.src == 'gmapdb':
35 #if $refGenomeSource.use_snps.snpindex != None and len($refGenomeSource.use_snps.snpindex.__str__) > 0:
36 -v $refGenomeSource.use_snps.snpindex.value
37 #end if
38 #elif $refGenomeSource.use_snps.src == 'history':
39 #if $refGenomeSource.use_snps.snpindex != None and len($refGenomeSource.use_snps.snpindex.__str__) > 0:
40 -V $refGenomeSource.use_snps.snpindex.extra_files_path -v $refGenomeSource.use_snps.snpindex.metadata.snps_name
41 #end if
30 #end if 42 #end if
31 #if $mode.__str__ != '': 43 #if $mode.__str__ != '':
32 --mode=$mode 44 --mode=$mode
33 #end if 45 #end if
34 #if $computation.options == "advanced": 46 #if $computation.options == "advanced":
179 2> $gsnap_stderr > $results 191 2> $gsnap_stderr > $results
180 #end if 192 #end if
181 193
182 </command> 194 </command>
183 <inputs> 195 <inputs>
184 <conditional name="refGenomeSource">
185 <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
186 <option value="indexed">Use a built-in index</option>
187 <option value="gmapdb">Use gmapdb from the history</option>
188 <option value="history">Use one from the history</option>
189 </param>
190 <when value="indexed">
191 <param name="gmapindex" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
192 <options from_file="gmap_indices.loc">
193 <column name="uid" index="0" />
194 <column name="dbkey" index="1" />
195 <column name="name" index="2" />
196 <column name="kmers" index="3" />
197 <column name="maps" index="4" />
198 <column name="snps" index="5" />
199 <column name="value" index="6" />
200 </options>
201 </param>
202
203 <param name="kmer" type="select" data_ref="gmapindex" label="kmer size" help="Defaults to highest available kmer size">
204 <options from_file="gmap_indices.loc">
205 <column name="name" index="3"/>
206 <column name="value" index="3"/>
207 <filter type="param_value" ref="gmapindex" column="6"/>
208 <filter type="multiple_splitter" column="3" separator=","/>
209 <filter type="add_value" name="" value=""/>
210 <filter type="sort_by" column="3"/>
211 </options>
212 </param>
213
214 <param name="splicemap" type="select" data_ref="gmapindex" label="Use map for splicing involving known sites or known introns" help="">
215 <options from_file="gmap_indices.loc">
216 <column name="name" index="4"/>
217 <column name="value" index="4"/>
218 <filter type="param_value" ref="gmapindex" column="6"/>
219 <filter type="multiple_splitter" column="4" separator=","/>
220 <filter type="add_value" name="" value=""/>
221 <filter type="sort_by" column="4"/>
222 </options>
223 </param>
224
225 <param name="snpindex" type="select" data_ref="gmapindex" label="Use database containing known SNPs" help="">
226 <options from_file="gmap_indices.loc">
227 <column name="name" index="5"/>
228 <column name="value" index="5"/>
229 <filter type="param_value" ref="gmapindex" column="6"/>
230 <filter type="multiple_splitter" column="5" separator=","/>
231 <filter type="add_value" name="" value=""/>
232 <filter type="sort_by" column="5"/>
233 </options>
234 </param>
235 </when>
236 <when value="gmapdb">
237 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb"
238 help="A GMAP database built with GMAP Build"/>
239 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
240 <options>
241 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
242 </options>
243 </param>
244 <param name="splicemap" type="select" data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
245 <options>
246 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
247 </options>
248 </param>
249 <param name="snpindex" type="select" data_ref="gmapdb" label="Use database containing known SNPs" help="">
250 <options>
251 <filter type="data_meta" ref="gmapdb" key="snps" multiple="True" separator=","/>
252 </options>
253 </param>
254
255 </when>
256 <when value="history">
257 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome"
258 help="Fasta containing genomic DNA sequence"/>
259 </when>
260 </conditional>
261 <!-- Input data --> 196 <!-- Input data -->
262 <conditional name="seq"> 197 <conditional name="seq">
263 <param name="format" type="select" label="Select the input format" help=""> 198 <param name="format" type="select" label="&lt;H2&gt;Input Sequences&lt;/H2&gt;Select the input format" help="">
264 <option value="fastq">Fastq</option> 199 <option value="fastq">Fastq</option>
265 <option value="gsnap_fasta">GNSAP fasta</option> 200 <option value="gsnap_fasta">GNSAP fasta</option>
266 </param> 201 </param>
267 <when value="fastq"> 202 <when value="fastq">
268 <param name="fastq" type="data" format="fastq" label="Select a fastq dataset" /> 203 <param name="fastq" type="data" format="fastq" label="Select a fastq dataset" />
306 <when value="gsnap_fasta"> 241 <when value="gsnap_fasta">
307 <param name="gsnap" type="data" format="fasta" label="Select a single-end dataset" help="GSNAP fasta must have the sequence entirely on one line, a second line is interpreted as the paired-end sequence"/> 242 <param name="gsnap" type="data" format="fasta" label="Select a single-end dataset" help="GSNAP fasta must have the sequence entirely on one line, a second line is interpreted as the paired-end sequence"/>
308 <param name="circularinput" type="boolean" checked="false" truevalue="--circular-input=true" falsevalue="" label="Circular-end data (paired reads are on same strand)"/> 243 <param name="circularinput" type="boolean" checked="false" truevalue="--circular-input=true" falsevalue="" label="Circular-end data (paired reads are on same strand)"/>
309 </when> 244 </when>
310 </conditional> 245 </conditional>
246
311 <param name="mode" type="select" label="Alignment mode" help="Assumes cmetindex and atoiindex were run on the gmap datatbase."> 247 <param name="mode" type="select" label="Alignment mode" help="Assumes cmetindex and atoiindex were run on the gmap datatbase.">
312 <option value="">standard</option> 248 <option value="">standard</option>
313 <option value="cmet-stranded">cmet-stranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option> 249 <option value="cmet-stranded">cmet-stranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
314 <option value="cmet-nonstranded">cmet-nonstranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option> 250 <option value="cmet-nonstranded">cmet-nonstranded for bisulfite-treated DNA reads (tolerance to C-to-T changes)</option>
315 <option value="atoi-stranded">atoi-stranded for RNA-editing tolerance (A-to-G changes)</option> 251 <option value="atoi-stranded">atoi-stranded for RNA-editing tolerance (A-to-G changes)</option>
316 <option value="atoi-nonstranded">atoi-nonstranded for RNA-editing tolerance (A-to-G changes)</option> 252 <option value="atoi-nonstranded">atoi-nonstranded for RNA-editing tolerance (A-to-G changes)</option>
317 </param> 253 </param>
254
255 <!-- GMAPDB for alignment -->
256 <conditional name="refGenomeSource">
257 <param name="genomeSource" type="select" label="&lt;HR&gt;&lt;H2&gt;Align To&lt;/H2&gt;Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
258 <option value="indexed">Use a built-in index</option>
259 <option value="gmapdb">Use a gmapdb from your history</option>
260 </param>
261 <when value="indexed">
262 <param name="gmapindex" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
263 <options from_file="gmap_indices.loc">
264 <column name="uid" index="0" />
265 <column name="dbkey" index="1" />
266 <column name="name" index="2" />
267 <column name="kmers" index="3" />
268 <column name="maps" index="4" />
269 <column name="snps" index="5" />
270 <column name="value" index="6" />
271 </options>
272 </param>
273
274 <param name="kmer" type="select" data_ref="gmapindex" label="kmer size" help="Defaults to highest available kmer size">
275 <options from_file="gmap_indices.loc">
276 <column name="name" index="3"/>
277 <column name="value" index="3"/>
278 <filter type="param_value" ref="gmapindex" column="6"/>
279 <filter type="multiple_splitter" column="3" separator=","/>
280 <filter type="add_value" name="" value=""/>
281 <filter type="sort_by" column="3"/>
282 </options>
283 </param>
284
285 <conditional name="use_splicing">
286 <param name="src" type="select" label="Known Splicesite and Introns"
287 help="Look for splicing involving known sites or known introns at short or long distances
288 See README instructions for the distinction between known sites and known introns">
289 <option value="none" selected="true">None</option>
290 <option value="gmapdb">From the GMAP Database</option>
291 <option value="history">A Map in your history</option>
292 </param>
293 <when value="none"/>
294 <when value="history">
295 <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map"
296 help="built with GMAP IIT"/>
297 </when>
298 <when value="gmapdb">
299 <param name="splicemap" type="select" data_ref="gmapindex" label="Use map for splicing involving known sites or known introns" help="">
300 <options from_file="gmap_indices.loc">
301 <column name="name" index="4"/>
302 <column name="value" index="4"/>
303 <filter type="param_value" ref="gmapindex" column="6"/>
304 <filter type="multiple_splitter" column="4" separator=","/>
305 <filter type="add_value" name="" value=""/>
306 <filter type="sort_by" column="4"/>
307 </options>
308 </param>
309 </when>
310 </conditional>
311
312 <conditional name="use_snps">
313 <param name="src" type="select" label="Known SNPs" help="for SNP tolerant alignments">
314 <option value="none" selected="true">None</option>
315 <option value="gmapdb">From the GMAP Database</option>
316 <option value="history">A SNP Index in your history</option>
317 </param>
318 <when value="none"/>
319 <when value="history">
320 <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex"
321 help="built with GMAP SNP Index"/>
322 </when>
323 <when value="gmapdb">
324 <param name="snpindex" type="select" data_ref="gmapindex" label="Use database containing known SNPs" help="">
325 <options from_file="gmap_indices.loc">
326 <column name="name" index="5"/>
327 <column name="value" index="5"/>
328 <filter type="param_value" ref="gmapindex" column="6"/>
329 <filter type="multiple_splitter" column="5" separator=","/>
330 <filter type="add_value" name="" value=""/>
331 <filter type="sort_by" column="5"/>
332 </options>
333 </param>
334 </when>
335 </conditional>
336
337 </when>
338 <when value="gmapdb">
339 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb"
340 help="A GMAP database built with GMAP Build"/>
341 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
342 <options>
343 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
344 </options>
345 </param>
346
347 <conditional name="use_splicing">
348 <param name="src" type="select" label="Known Splicesite and Introns"
349 help="Look for splicing involving known sites or known introns at short or long distances
350 See README instructions for the distinction between known sites and known introns">
351 <option value="none" selected="true">None</option>
352 <option value="gmapdb">From the GMAP Database</option>
353 <option value="history">A Map in your history</option>
354 </param>
355 <when value="none"/>
356 <when value="history">
357 <param name="splicemap" type="data" format="splicesites.iit,introns.iit" metadata_name="dbkey" label="Select a splicesite map"
358 help="built with GMAP IIT"/>
359 </when>
360 <when value="gmapdb">
361 <param name="splicemap" type="select" data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
362 <options>
363 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
364 </options>
365 </param>
366 </when>
367 </conditional>
368
369 <conditional name="use_snps">
370 <param name="src" type="select" label="Known SNPs" help="for SNP tolerant alignments">
371 <option value="none" selected="true">None</option>
372 <option value="gmapdb">From the GMAP Database</option>
373 <option value="history">A SNP Index in your history</option>
374 </param>
375 <when value="none"/>
376 <when value="history">
377 <param name="snpindex" type="data" format="gmapsnpindex" metadata_name="dbkey" label="Select a snpindex"
378 help="built with GMAP SNP Index"/>
379 </when>
380 <when value="gmapdb">
381 <param name="snpindex" type="select" data_ref="gmapdb" label="Use database containing known SNPs" help="">
382 <options>
383 <filter type="data_meta" ref="gmapdb" key="snps" multiple="True" separator=","/>
384 </options>
385 </param>
386 </when>
387 </conditional>
388
389 </when>
390 </conditional>
391
318 <!-- Computation options --> 392 <!-- Computation options -->
319 <conditional name="computation"> 393 <conditional name="computation">
320 <param name="options" type="select" label="Computational Settings" help=""> 394 <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help="">
321 <option value="default">Use default settings</option> 395 <option value="default">Use default settings</option>
322 <option value="advanced">Set Computation Options</option> 396 <option value="advanced">Set Computation Options</option>
323 </param> 397 </param>
324 <when value="default"/> 398 <when value="default"/>
325 <when value="advanced"> 399 <when value="advanced">
350 <option value="paired" selected="true">paired</option> 424 <option value="paired" selected="true">paired</option>
351 <option value="off">off</option> 425 <option value="off">off</option>
352 </param> 426 </param>
353 <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)" 427 <param name="trim_mismatch_score" type="integer" value="" optional="true" label="Score to use for mismatches when trimming at ends (default is -3)"
354 help="to turn off trimming, specify 0"/> 428 help="to turn off trimming, specify 0"/>
355 <!-- use-snps This should be either a select list from the gmapdb maps or a data type using snpsdir and use-snps --> 429
356 <param name="use_snps" type="text" value="" optional="true" label="SNP database Name for SNP tolearnce" help="Use database containing known SNPs (built previously using snpindex) for tolerance to SNPs"/>
357 <!-- Options for GMAP alignment within GSNAP --> 430 <!-- Options for GMAP alignment within GSNAP -->
358 <param name="gmap_mode" type="select" multiple="true" optional="true" label="Cases to use GMAP for complex alignments containing multiple splices or indels" help=""> 431 <param name="gmap_mode" type="select" multiple="true" optional="true" label="Cases to use GMAP for complex alignments containing multiple splices or indels" help="">
359 <option value="pairsearch">pairsearch</option> 432 <option value="pairsearch">pairsearch</option>
360 <option value="terminal">terminal</option> 433 <option value="terminal">terminal</option>
361 <option value="improve">improve</option> 434 <option value="improve">improve</option>
374 </param> 447 </param>
375 </when> 448 </when>
376 </conditional> 449 </conditional>
377 450
378 <conditional name="splicing"> 451 <conditional name="splicing">
379 <param name="options" type="select" label="Splicing options for RNA-Seq" help=""> 452 <param name="options" type="select" label="&lt;HR&gt;Splicing options for RNA-Seq" help="">
380 <option value="default">Use default settings</option> 453 <option value="default">Use default settings</option>
381 <option value="advanced">Set Splicing Options</option> 454 <option value="advanced">Set Splicing Options</option>
382 </param> 455 </param>
383 <when value="default"/> 456 <when value="default"/>
384 <when value="advanced"> 457 <when value="advanced">
385 <!-- Splicing options for RNA-Seq --> 458 <!-- Splicing options for RNA-Seq -->
386 <!-- use-splices This should be either a select list from the gmapdb maps or a data type using splicesdir and use-splices --> 459 <!-- use-splices This should be either a select list from the gmapdb maps or a data type using splicesdir and use-splices -->
387 <param name="use_splices" type="text" value="" optional="true" label="Known splicesites or introns" help="Look for splicing involving known sites or known introns at short or long distances See README instructions for the distinction between known sites and known introns"/> 460 <!-- Neither novel splicing (-N) nor known splicing (-s) turned on => assume reads are DNA-Seq (genomic) -->
388 <param name="novelsplicing" type="boolean" checked="false" truevalue="--novelsplicing=1" falsevalue="" label="Look for novel splicing "/> 461 <param name="novelsplicing" type="boolean" checked="false" truevalue="--novelsplicing=1" falsevalue="" label="Look for novel splicing "/>
389 <param name="localsplicedist" type="integer" value="" optional="true" label="Definition of local novel splicing event (default 200000)"/> 462 <param name="localsplicedist" type="integer" value="" optional="true" label="Definition of local novel splicing event (default 200000)"/>
390 <param name="local_splice_penalty" type="integer" value="" optional="true" label="Penalty for a local splice (default 0). Counts against mismatches allowed"/> 463 <param name="local_splice_penalty" type="integer" value="" optional="true" label="Penalty for a local splice (default 0). Counts against mismatches allowed"/>
391 <param name="distant_splice_penalty" type="integer" value="" optional="true" label="Penalty for a distant splice (default 3). Counts against mismatches allowed"/> 464 <param name="distant_splice_penalty" type="integer" value="" optional="true" label="Penalty for a distant splice (default 3). Counts against mismatches allowed"/>
392 <param name="local_splice_endlength" type="integer" value="" optional="true" label="Minimum length at end required for local spliced alignments (default 15, min is 14)"/> 465 <param name="local_splice_endlength" type="integer" value="" optional="true" label="Minimum length at end required for local spliced alignments (default 15, min is 14)"/>
396 </when> 469 </when>
397 </conditional> 470 </conditional>
398 471
399 <!-- Output data --> 472 <!-- Output data -->
400 <conditional name="output"> 473 <conditional name="output">
401 <param name="options" type="select" label="Output options for RNA-Seq" help=""> 474 <param name="options" type="select" label="&lt;HR&gt;&lt;H2&gt;Output&lt;/H2&gt;Output options for RNA-Seq" help="">
402 <option value="default">Use default settings</option> 475 <option value="default">Use default settings</option>
403 <option value="advanced">Set Output Options</option> 476 <option value="advanced">Set Output Options</option>
404 </param> 477 </param>
405 <when value="default"/> 478 <when value="default"/>
406 <when value="advanced"> 479 <when value="advanced">
437 </conditional> 510 </conditional>
438 <param name="split_output" type="boolean" truevalue="--split-output=gsnap_out" falsevalue="" checked="false" label="Separate outputs" 511 <param name="split_output" type="boolean" truevalue="--split-output=gsnap_out" falsevalue="" checked="false" label="Separate outputs"
439 help="Separate outputs for: nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and concordant_mult results"/> 512 help="Separate outputs for: nomapping, halfmapping_uniq, halfmapping_mult, unpaired_uniq, unpaired_mult, paired_uniq, paired_mult, concordant_uniq, and concordant_mult results"/>
440 </inputs> 513 </inputs>
441 <outputs> 514 <outputs>
442 <data format="txt" name="gsnap_stderr" label="${tool.name} on ${on_string}: log"/> 515 <data format="txt" name="gsnap_stderr" label="${tool.name} on ${on_string}: stderr"/>
443 <data format="txt" name="results" label="${tool.name} on ${on_string} ${result.format}" > 516 <data format="txt" name="results" label="${tool.name} on ${on_string} ${result.format}" >
444 <filter>(split_output == False)</filter> 517 <filter>(split_output == False)</filter>
445 <change_format> 518 <change_format>
446 <when input="result['format']" value="sam" format="sam"/> 519 <when input="result['format']" value="sam" format="sam"/>
447 </change_format> 520 </change_format>