comparison gmap.xml @ 7:561503a442f0

refactor
author Jim Johnson <jj@umn.edu>
date Tue, 08 Nov 2011 13:26:41 -0600
parents
children a89fec682254
comparison
equal deleted inserted replaced
6:3be0e0a858fe 7:561503a442f0
1 <tool id="gmap" name="GMAP" version="2.0.0">
2 <description>Genomic Mapping and Alignment Program for mRNA and EST sequences</description>
3 <requirements>
4 <requirement type="binary">gmap</requirement>
5 <!-- proposed tag for added datatype dependencies -->
6 <requirement type="datatype">gmapdb</requirement>
7 <requirement type="datatype">gmap_annotation</requirement>
8 <requirement type="datatype">gmap_splicesites</requirement>
9 <requirement type="datatype">gmap_introns</requirement>
10 <requirement type="datatype">gmap_snps</requirement>
11 </requirements>
12 <version_string>gmap --version</version_string>
13 <command>
14 #import os,os.path
15 gmap
16 --nthreads=4 --ordered
17 #if $refGenomeSource.genomeSource == "history":
18 --gseg=$refGenomeSource.ownFile
19 #elif $refGenomeSource.genomeSource == "gmapdb":
20 #set $gmapdb = $os.listdir($refGenomeSource.gmapdb.extra_files_path)[0]
21 --dir=$refGenomeSource.gmapdb.extra_files_path --db=$gmapdb
22 #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
23 --kmer=$refGenomeSource.kmer
24 #end if
25 #else:
26 --dir=$os.path.dirname($refGenomeSource.gmapindex.value) --db=$os.path.basename($refGenomeSource.gmapindex.value)
27 #if $refGenomeSource.kmer != None and len($refGenomeSource.kmer.__str__) == 2:
28 --kmer=$refGenomeSource.kmer
29 #end if
30 #end if
31 #if $result.format == "summary":
32 --summary
33 #elif $result.format == "align":
34 --align
35 #elif $result.format == "continuous":
36 --continuous
37 #elif $result.format == "continuous-by-exon":
38 --continuous-by-exon
39 #elif $result.format == "compress":
40 --compress
41 #elif $result.format == "exons_dna":
42 --exons=cdna
43 #elif $result.format == "exons_gen":
44 --exons=genomic
45 #elif $result.format == "protein_dna":
46 --protein_dna
47 #elif $result.format == "protein_gen":
48 --protein_gen
49 #elif $result.format == "sam":
50 --format=$result.sam_paired_read
51 $result.no_sam_headers
52 #if len($result.noncanonical_splices.__str__) > 0
53 --noncanonical-splices=$result.noncanonical_splices
54 #end if
55 #if len($result.read_group_id.__str__) > 0
56 --read-group-id=$result.read_group_id
57 #end if
58 #if len($result.read_group_name.__str__) > 0
59 --read-group-name=$result.read_group_name
60 #end if
61 #if len($result.read_group_library.__str__) > 0
62 --read-group-library=$result.read_group_library
63 #end if
64 #if len($result.read_group_platform.__str__) > 0
65 --read-group-platform=$result.read_group_platform
66 #end if
67 #elif $result.format != "gmap":
68 --format=$result.format
69 #end if
70 #if $computation.options == "advanced":
71 $computation.nosplicing
72 $computation.cross_species
73 --min-intronlength=$computation.min_intronlength
74 --intronlength=$computation.intronlength
75 --localsplicedist=$computation.localsplicedist
76 --totallength=$computation.totallength
77 --trimendexons=$computation.trimendexons
78 --direction=$computation.direction
79 --canonical-mode=$computation.canonical
80 --prunelevel=$computation.prunelevel
81 --allow-close-indels=$computation.allow_close_indels
82 --microexon-spliceprob=$computation.microexon_spliceprob
83 #if int($computation.chimera_margin) >= 0:
84 --chimera-margin=$computation.chimera_margin
85 #end if
86 #end if
87 #if $advanced.options == "used":
88 #if int($advanced.npaths) >= 0:
89 --npaths=$advanced.npaths
90 #end if
91 #if int($advanced.chimera_overlap) > 0:
92 --chimera_overlap=$advanced.chimera_overlap
93 #end if
94 $advanced.protein
95 $advanced.tolerant
96 $advanced.nolengths
97 $advanced.invertmode
98 #if int($advanced.introngap) > 0:
99 --introngap=$advanced.introngap
100 #end if
101 #if int($advanced.wraplength) > 0:
102 --wraplength=$advanced.wraplength
103 #end if
104 #end if
105 #if $split_output == True
106 $split_output
107 #end if
108 #if len($quality_protocol.__str__) > 0:
109 --quality-protocol=$quality_protocol
110 #end if
111 $input
112 #for $i in $inputs:
113 ${i.added_input}
114 #end for
115 #if $split_output == True
116 2> $gmap_stderr
117 #else
118 2> $gmap_stderr > $output
119 #end if
120 </command>
121 <inputs>
122 <!-- Input data -->
123 <param name="input" type="data" format="fasta,fastqsanger,fastqillumina" label="&lt;H2&gt;Input Sequences&lt;/H2&gt;Select an mRNA or EST dataset to map" />
124 <repeat name="inputs" title="addtional mRNA or EST dataset to map">
125 <param name="added_input" type="data" format="fasta,fastqsanger,fastqillumina" label=""/>
126 </repeat>
127 <param name="quality_protocol" type="select" label="Protocol for input quality scores">
128 <option value="">No quality scores</option>
129 <option value="sanger">Sanger quality scores</option>
130 <option value="illumina">Illumina quality scores</option>
131 </param>
132
133 <!-- GMAPDB for mapping -->
134 <conditional name="refGenomeSource">
135 <param name="genomeSource" type="select" label="&lt;HR&gt;&lt;H2&gt;Map To&lt;/H2&gt;Will you map to a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options">
136 <option value="indexed">Use a built-in index</option>
137 <option value="gmapdb">Use gmapdb from the history</option>
138 <option value="history">Use a fasta reference sequence from the history</option>
139 </param>
140 <when value="indexed">
141 <param name="gmapindex" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team">
142 <options from_file="gmap_indices.loc">
143 <column name="uid" index="0" />
144 <column name="dbkey" index="1" />
145 <column name="name" index="2" />
146 <column name="kmers" index="3" />
147 <column name="maps" index="4" />
148 <column name="snps" index="5" />
149 <column name="value" index="6" />
150 </options>
151 </param>
152 <param name="kmer" type="select" data_ref="gmapindex" label="kmer size" help="Defaults to highest available kmer size">
153 <options from_file="gmap_indices.loc">
154 <column name="name" index="3"/>
155 <column name="value" index="3"/>
156 <filter type="param_value" ref="gmapindex" column="6"/>
157 <filter type="multiple_splitter" column="3" separator=","/>
158 <filter type="add_value" name="" value=""/>
159 <filter type="sort_by" column="3"/>
160 </options>
161 </param>
162 <param name="map" type="select" data_ref="gmapindex" label="Look for splicing involving known sites or known introns" help="">
163 <options from_file="gmap_indices.loc">
164 <column name="name" index="4"/>
165 <column name="value" index="4"/>
166 <filter type="param_value" ref="gmapindex" column="6"/>
167 <filter type="multiple_splitter" column="4" separator=","/>
168 <filter type="add_value" name="" value=""/>
169 <filter type="sort_by" column="4"/>
170 </options>
171 </param>
172 </when>
173 <when value="gmapdb">
174 <param name="gmapdb" type="data" format="gmapdb" metadata_name="dbkey" label="Select a gmapdb"
175 help="A GMAP database built with GMAP Build"/>
176 <param name="kmer" type="select" data_ref="gmapdb" label="kmer size" help="Defaults to highest available kmer size">
177 <options>
178 <filter type="data_meta" ref="gmapdb" key="kmers" multiple="True" separator=","/>
179 </options>
180 </param>
181 <param name="map" type="select" data_ref="gmapdb" label="Use map for splicing involving known sites or known introns" help="">
182 <options>
183 <filter type="data_meta" ref="gmapdb" key="maps" multiple="True"/>
184 </options>
185 </param>
186 </when>
187 <when value="history">
188 <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select the reference genome"
189 help="Fasta containing genomic DNA sequence"/>
190 </when>
191 </conditional>
192
193
194 <!-- Computation options -->
195 <conditional name="computation">
196 <param name="options" type="select" label="&lt;HR&gt;Computational Settings" help="">
197 <option value="default">Use default settings</option>
198 <option value="advanced">Set Computation Options</option>
199 </param>
200 <when value="default"/>
201 <when value="advanced">
202 <param name="nosplicing" type="boolean" truevalue="--nosplicing" falsevalue="" checked="false" label="Turn off splicing" help="(useful for aligning genomic sequences onto a genome)"/>
203 <param name="min_intronlength" type="integer" value="9" label="Min length for one internal intron (default 9)." help="Below this size, a genomic gap will be considered a deletion rather than an intron." />
204 <param name="intronlength" type="integer" value="1000000" label="Max length for one intron (default 1000000)" />
205 <param name="localsplicedist" type="integer" value="200000" label="Max length for known splice sites at ends of sequence (default 200000)" />
206 <param name="totallength" type="integer" value="2400000" label="Max total intron length (default 2400000)" />
207 <param name="chimera_margin" type="integer" value="40" label="Amount of unaligned sequence that triggers search for a chimera (default is 40, 0 is off)" />
208 <param name="direction" type="select" label="cDNA direction">
209 <option value="auto">auto</option>
210 <option value="sense_force">sense_force</option>
211 <option value="antisense_force">antisense_force</option>
212 <option value="sense_filter">sense_filter</option>
213 <option value="antisense_filter">antisense_filter</option>
214 </param>
215 <param name="trimendexons" type="integer" value="12" label="Trim end exons with fewer than given number of matches (in nt, default 12)" />
216 <param name="cross_species" type="boolean" truevalue="--cross-species" falsevalue="" checked="false" label="Cross-species alignment" help="For cross-species alignments, use a more sensitive search for canonical splicing"/>
217
218 <param name="canonical" type="select" label="Reward for canonical and semi-canonical introns">
219 <option value="1">high reward (default)</option>
220 <option value="0">low reward</option>
221 <option value="2">low reward for high-identity sequences</option>
222 </param>
223 <param name="allow_close_indels" type="select" label="Allow an insertion and deletion close to each other">
224 <option value="1" selected="true">yes (default)</option>
225 <option value="0">no</option>
226 <option value="2">only for high-quality alignments</option>
227 </param>
228 <param name="microexon_spliceprob" type="float" value="0.90" label="Allow microexons only if one of the splice site probabilities is greater than this value (default 0.90)" >
229 <validator type="in_range" message="slice probability between 0.00 and 1.00" min="0" max="1"/>
230 </param>
231 <param name="prunelevel" type="select" label="Pruning level">
232 <option value="0">no pruning (default)</option>
233 <option value="1">poor sequences</option>
234 <option value="2">repetitive sequences</option>
235 <option value="3">poor and repetitive sequences</option>
236 </param>
237 <!-- could do this as a config file
238 <param name="chrsubsetfile" type="data" format="fasta" label="User-supplied chromosome subset file" />
239 <param name="chrsubset" type="text" label="Chromosome subset to search" />
240 -->
241 </when>
242 </conditional>
243
244 <!-- Advanced Settings -->
245 <conditional name="advanced">
246 <param name="options" type="select" label="&lt;HR&gt;Advanced Settings" help="">
247 <option value="default">Use default settings</option>
248 <option value="used">Set Options</option>
249 </param>
250 <when value="default"/>
251 <when value="used">
252 <param name="nolengths" type="boolean" checked="false" truevalue="--nolengths=true" falsevalue="" label="No intron lengths in alignment"/>
253 <param name="invertmode" type="select" label=" Mode for alignments to genomic (-) strand" help="">
254 <option value="">Don't invert the cDNA (default)</option>
255 <option value="--invertmode=1">Invert cDNA and print genomic (-) strand</option>
256 <option value="--invertmode=2">Invert cDNA and print genomic (+) strand</option>
257 </param>
258 <param name="introngap" type="integer" value="3" label="Nucleotides to show on each end of intron (default=3)" />
259 <param name="wraplength" type="integer" value="50" label="Line Wrap length for alignment (default=50)" />
260 <param name="npaths" type="integer" value="-1" optional="true"
261 label="Maximum number of paths to show. Ignored if negative. If 0, prints two paths if chimera detected, else one." />
262 <param name="chimera_overlap" type="integer" value="0" label="Overlap to show, if any, at chimera breakpoint" />
263 <param name="tolerant" type="boolean" checked="false" truevalue="--tolerant=true" falsevalue=""
264 label="Translates cDNA with corrections for frameshifts"/>
265 <param name="protein" type="select" label="Protein alignment" help="">
266 <option value="">default</option>
267 <option value="--fulllength=true">Assume full-length protein, starting with Met</option>
268 <option value="--truncate=true">Truncate alignment around full-length protein, Met to Stop</option>
269 </param>
270 </when>
271 </conditional>
272
273 <!-- Output data -->
274 <conditional name="result">
275 <param name="format" type="select" label="&lt;HR&gt;&lt;H2&gt;Output&lt;/H2&gt;Select the output format" help="">
276 <option value="gmap">GMAP default output</option>
277 <option value="summary">Summary of alignments</option>
278 <option value="align">Alignment</option>
279 <option value="continuous">Alignment in three continuous lines</option>
280 <option value="continuous-by-exon">Alignment in three lines per exon</option>
281 <option value="compress">Print output in compressed format</option>
282 <option value="exons_dna">Print exons cDNA</option>
283 <option value="exons_gen">Print exons genomic</option>
284 <option value="protein_dna">Print protein sequence (cDNA)</option>
285 <option value="protein_gen">Print protein sequence (genomic)</option>
286 <option value="psl">PSL (BLAT) format</option>
287 <option value="gff3_gene">GFF3 gene format</option>
288 <option value="gff3_match_cdna">GFF3 match cDNA format</option>
289 <option value="gff3_match_est">GFF3 match EST format</option>
290 <option value="splicesites">splicesites output (for GSNAP)</option>
291 <option value="introns">introns output (for GSNAP)</option>
292 <option value="map_exons">IIT FASTA exon map format</option>
293 <option value="map_genes">IIT FASTA map format</option>
294 <option value="coords">coords in table format</option>
295 <option value="sam" selected="true">SAM format</option>
296 </param>
297 <when value="gmap"/>
298 <when value="summary"/>
299 <when value="align">
300
301 </when>
302 <when value="continuous">
303 </when>
304 <when value="continuous-by-exon">
305 </when>
306 <when value="compress"/>
307 <when value="exons_dna"/>
308 <when value="exons_gen"/>
309 <when value="protein_dna"/>
310 <when value="protein_gen"/>
311 <when value="psl"/>
312 <when value="gff3_gene"/>
313 <when value="gff3_match_cdna"/>
314 <when value="gff3_match_est"/>
315 <when value="splicesites"/>
316 <when value="introns"/>
317 <when value="map_exons"/>
318 <when value="map_genes"/>
319 <when value="coords"/>
320 <when value="sam">
321 <param name="sam_paired_read" type="boolean" truevalue="sampe" falsevalue="samse" checked="false" label="SAM paired reads"/>
322 <param name="no_sam_headers" type="boolean" truevalue="--no-sam-headers" falsevalue="" checked="false" label="Do not print headers beginning with '@'"/>
323 <param name="noncanonical_splices" type="select" label="Print non-canonical genomic gaps greater than 20 nt in CIGAR string as STRING.">
324 <option value="">Use default</option>
325 <option value="N">N</option>
326 <option value="D">D</option>
327 </param>
328 <param name="read_group_id" type="text" value="" label="Value to put into read-group id (RG-ID) field"/>
329 <param name="read_group_name" type="text" value="" label="Value to put into read-group name (RG-SM) field"/>
330 <param name="read_group_library" type="text" value="" label="Value to put into read-group library (RG-LB) field"/>
331 <param name="read_group_platform" type="text" value="" label="Value to put into read-group library platform (RG-PL) field"/>
332 </when>
333 </conditional> <!-- name="result" -->
334
335 <param name="split_output" type="boolean" truevalue="--split-output=gmap_out" falsevalue="" checked="false" label="Separate outputs for nomapping, uniq, mult, and chimera" help="(chimera only when chimera-margin is selected)"/>
336
337
338 <!--
339 map=iitfile Map file. If argument is '?' (with the quotes), this lists available map files.
340 mapexons Map each exon separately
341 mapboth Report hits from both strands of genome
342 flanking=INT Show flanking hits (default 0)
343 print-comment Show comment line for each hit
344 -->
345
346
347 </inputs>
348 <outputs>
349 <data format="txt" name="gmap_stderr" label="${tool.name} on ${on_string}: stderr"/>
350 <data format="txt" name="output" label="${tool.name} on ${on_string} ${result.format}" >
351 <filter>(split_output == False)</filter>
352 <change_format>
353 <when input="result['format']" value="gff3_gene" format="gff3"/>
354 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
355 <when input="result['format']" value="gff3_match_est" format="gff3"/>
356 <when input="result['format']" value="sam" format="sam"/>
357 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
358 <when input="result['format']" value="introns" format="gmap_introns"/>
359 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
360 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
361 </change_format>
362 </data>
363 <data format="txt" name="uniq" label="${tool.name} on ${on_string} uniq.${result.format}" from_work_dir="gmap_out.uniq">
364 <filter>(split_output == True)</filter>
365 <change_format>
366 <when input="result['format']" value="gff3_gene" format="gff3"/>
367 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
368 <when input="result['format']" value="gff3_match_est" format="gff3"/>
369 <when input="result['format']" value="sam" format="sam"/>
370 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
371 <when input="result['format']" value="introns" format="gmap_introns"/>
372 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
373 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
374 </change_format>
375 </data>
376 <data format="txt" name="transloc" label="${tool.name} on ${on_string} transloc.${result.format}" from_work_dir="gmap_out.transloc">
377 <filter>(split_output == True)</filter>
378 <change_format>
379 <when input="result['format']" value="gff3_gene" format="gff3"/>
380 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
381 <when input="result['format']" value="gff3_match_est" format="gff3"/>
382 <when input="result['format']" value="sam" format="sam"/>
383 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
384 <when input="result['format']" value="introns" format="gmap_introns"/>
385 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
386 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
387 </change_format>
388 </data>
389 <data format="txt" name="nomapping" label="${tool.name} on ${on_string} nomapping.${result.format}" from_work_dir="gmap_out.nomapping">
390 <filter>(split_output == True)</filter>
391 <change_format>
392 <when input="result['format']" value="gff3_gene" format="gff3"/>
393 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
394 <when input="result['format']" value="gff3_match_est" format="gff3"/>
395 <when input="result['format']" value="sam" format="sam"/>
396 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
397 <when input="result['format']" value="introns" format="gmap_introns"/>
398 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
399 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
400 </change_format>
401 </data>
402 <data format="txt" name="mult" label="${tool.name} on ${on_string} mult.${result.format}" from_work_dir="gmap_out.mult">
403 <filter>(split_output == True)</filter>
404 <change_format>
405 <when input="result['format']" value="gff3_gene" format="gff3"/>
406 <when input="result['format']" value="gff3_match_cdna" format="gff3"/>
407 <when input="result['format']" value="gff3_match_est" format="gff3"/>
408 <when input="result['format']" value="sam" format="sam"/>
409 <when input="result['format']" value="splicesites" format="gmap_splicesites"/>
410 <when input="result['format']" value="introns" format="gmap_introns"/>
411 <when input="result['format']" value="map_genes" format="gmap_annotation"/>
412 <when input="result['format']" value="map_exons" format="gmap_annotation"/>
413 </change_format>
414 </data>
415 </outputs>
416 <tests>
417 </tests>
418
419 <help>
420
421 **What it does**
422
423 GMAP_ (Genomic Mapping and Alignment Program) The functionality provided by gmap allows a user to: (1) map and align a single cDNA interactively against a large genome in about a second, without the startup time of several minutes typically needed by existing mapping programs; (2) switch arbitrarily among different genomes, without the need for a preloaded server dedicated to each genome; (3) run the program on computers with as little as 128 MB of RAM (random access memory); (4) perform high-throughput batch processing of cDNAs by using memory mapping and multithreading when appropriate memory and hardware are available; (5) generate accurate gene models, even in the presence of substantial polymorphisms and sequence errors; (6) locate splice sites accurately without the use of probabilistic splice site models, allowing generalized use of the program across species; (7) detect statistically significant microexons and incorporate them into the alignment; and (8) handle mapping and alignment tasks on genomes having alternate assemblies, linkage groups or strains. It is developed by Thomas D. Wu of Genentech, Inc.
424
425 Publication_ citation: Thomas D. Wu, Colin K. Watanabe Bioinformatics 2005 21(9):1859-1875; doi:10.1093/bioinformatics/bti310
426
427 .. _GMAP: http://research-pub.gene.com/gmap/
428 .. _Publication: http://bioinformatics.oxfordjournals.org/cgi/content/full/21/9/1859
429
430 ------
431
432 **Know what you are doing**
433
434 .. class:: warningmark
435
436 You will want to read the README_
437
438 .. _README: http://research-pub.gene.com/gmap/src/README
439
440 </help>
441 </tool>
442