comparison lastz_paired_reads_wrapper.xml @ 0:96825cee5c25 draft

Uploaded tarball
author devteam
date Mon, 26 Nov 2012 09:48:27 -0500
parents
children 39f974d0884e
comparison
equal deleted inserted replaced
-1:000000000000 0:96825cee5c25
1 <tool id="lastz_paired_reads_wrapper" name="Lastz paired reads" version="1.1.1">
2 <requirements>
3 <requirement type="package" version="1.02.00">lastz</requirement>
4 </requirements>
5 <description> map short paired reads against reference sequence</description>
6 <command interpreter="python">lastz_paired_reads_wrapper.py
7 #if $seq_name.how_to_name=="yes":
8 --ref_name=$seq_name.ref_name
9 #end if
10 --ref_source=$source.ref_source
11 --input2=$input2
12 --input3=$input3
13 --input4=$input4
14 #if $source.ref_source=="history":
15 --input1=$source.input1
16 --ref_sequences=$input1.metadata.sequences
17 #else:
18 --input1="${source.input1_2bit.fields.path}"
19 #end if
20 --output=$output1
21 --lastz_seqs_file_dir=${GALAXY_DATA_INDEX_DIR}
22 </command>
23 <inputs>
24 <param name="input2" format="fasta" type="data" label="Align sequencing reads in" />
25 <conditional name="source">
26 <param name="ref_source" type="select" label="Against reference sequences that are">
27 <option value="cached">locally cached</option>
28 <option value="history">in your history</option>
29 </param>
30 <when value="cached">
31 <param name="input1_2bit" type="select" label="Using reference genome" help="If your genome of interest is not listed, contact the Galaxy team">
32 <options from_data_table="lastz_seqs" />
33 </param>
34 </when>
35 <when value="history">
36 <param name="input1" type="data" format="fasta" label="Select a reference dataset" />
37 </when>
38 </conditional>
39 <param name="input3" format="fasta" type="data" label="Linker file" />
40 <param name="input4" format="qual454" type="data" label="Select a base quality score 454 dataset" />
41 <conditional name="seq_name">
42 <param name="how_to_name" type="select" label="Do you want to modify the reference name?">
43 <option value="no">No</option>
44 <option value="yes">Yes</option>
45 </param>
46 <when value="yes">
47 <param name="ref_name" type="text" size="25" value="Type sequence name here" label="Enter name for the Reference sequence"/>
48 </when>
49 <when value="no" />
50 </conditional>
51 </inputs>
52 <outputs>
53 <data format="sam" name="output1" label="${tool.name} on ${on_string}: mapped reads" />
54 </outputs>
55 <tests>
56 <test>
57 <!--
58 input1: a reference genome ( 2bit or fasta )
59 input2: a collection of 454 paired end reads ( a fasta file )
60 input3: a linker sequence ( a very small fasta file )
61 input4: a base quality score 454 file ( qual454 )
62 -->
63 <param name="input2" value="lastz_paired_input2.fasta" ftype="fasta" />
64 <param name="ref_source" value="cached" />
65 <param name="input1_2bit" value="/galaxy/data/hg18/seq/chr21.2bit" />
66 <param name="input3" value="lastz_paired_input3.fasta" ftype="fasta" />
67 <param name="input4" value="lastz_paired_input4.qual454" ftype="qual454" />
68 <param name="how_to_name" value="no" />
69 <output name="output1" file="lastz_paired_out1.sam" />
70 </test>
71 </tests>
72 <help>
73
74 **What it does**
75
76 **LASTZ** is a high performance pairwise sequence aligner derived from BLASTZ. It is written by Bob Harris in Webb Miller's laboratory at Penn State University. Special scoring sets were derived to improve runtime performance and quality. This Galaxy version of LASTZ is geared towards aligning short (Illumina/Solexa, AB/SOLiD) and medium (Roche/454) paired reads against a reference sequence. There is excellent, extensive documentation on LASTZ available here_.
77
78 .. _here: http://www.bx.psu.edu/miller_lab/dist/README.lastz-1.02.00/README.lastz-1.02.00.html
79
80 ------
81
82 **Input formats**
83
84 LASTZ accepts reference and reads in FASTA format. However, because Galaxy supports implicit format conversion the tool will recognize fastq and other method specific formats.
85
86 ------
87
88 **Outputs**
89
90 This LASTZ tool produces a SAM file showing sequence alignments.
91
92 **SAM output**
93
94 SAM has 12 columns::
95
96 1 2 3 4 5 6 7 8 9 10 11 12
97 ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
98 HWI-EAS91_1_30788AAXX:1:2:1670:915 99 chr9 58119878 60 36M = 58120234 392 GACCCCTACCCCACCGTGCTCTGGATCTCAGTGTTT IIIIIIIIIIIIIIIIEIIIIIII7IIIIIIIIIII XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:36
99 HWI-EAS91_1_30788AAXX:1:2:1670:915 147 chr9 58120234 60 36M = 58119878 -392 ATGAGTCGAATTCTATTTTCCAAACTGTTAACAAAA IFIIDI;IIICIIIIIIIIIIIIIIIIIIIIIIIII XT:A:U NM:i:0 SM:i:37 AM:i:37 X0:i:1 X1:i:0 XM:i:0 XO:i:0 XG:i:0 MD:Z:36
100
101
102 where::
103
104 Column Description
105 --------- ---------------------------------------------------------------------
106 1. QNAME Query (pair) NAME
107 2. FLAG bitwise FLAG
108 3. RNAME Reference sequence NAME
109 4. POS 1-based leftmost POSition/coordinate of clipped sequence
110 5. MAPQ MAPping Quality (Phred-scaled)
111 6. CIGAR extended CIGAR string
112 7. MRNM Mate Reference sequence NaMe ('=' if same as RNAME)
113 8. MPOS 1-based Mate POSition
114 9. ISIZE Inferred insert SIZE
115 10. SEQ query SEQuence on the same strand as the reference
116 11. QUAL query QUALity (ASCII-33 gives the Phred base quality)
117 12. OPT variable OPTional fields in the format TAG:VTYPE:VALUE, tab-separated
118
119 The flags are as follows::
120
121 Flag Description
122 ------ -------------------------------------
123 0x0001 the read is paired in sequencing
124 0x0002 the read is mapped in a proper pair
125 0x0004 the query sequence itself is unmapped
126 0x0008 the mate is unmapped
127 0x0010 strand of the query (1 for reverse)
128 0x0020 strand of the mate
129 0x0040 the read is the first read in a pair
130 0x0080 the read is the second read in a pair
131 0x0100 the alignment is not primary
132
133 ------
134
135 **Do you want to modify the reference name?**
136
137 This option allows you to set the name of the reference sequence manually. This is helpful when, for example, you would like to make the reference name compatible with the UCSC naming conventions to be able to display your lastz results as a custom track at the UCSC Genome Browser.
138
139 ------
140
141 **LASTZ parameter list**
142
143 This is an exhaustive list of LASTZ options. Once again, please note that not all parameters are included in this interface. If you would like to make additional options available through Galaxy, e-mail us at galaxy-bugs@bx.psu.edu::
144
145 target[[s..e]][-] spec/file containing target sequence (fasta or nib)
146 [s..e] defines a subrange of the file
147 - indicates reverse-complement
148 (use --help=files for more details)
149 query[[s..e]][-] spec/file containing query sequences (fasta or nib)
150 if absent, queries come from stdin (unless they
151 aren't needed, as for --self or --tableonly)
152 (use --help=files for more details)
153 --self the target sequence is also the query
154 --quantum the query sequence contains quantum DNA
155 --seed=match&lt;length&gt; use a word with no gaps instead of a seed pattern
156 --seed=half&lt;length&gt; use space-free half-weight word instead of seed pattern
157 --match=&lt;reward&gt;[,&lt;penalty&gt;] set the score values for a match (+&lt;reward&gt;)
158 and mismatch (-&lt;penalty&gt;)
159 --[no]trans[ition][=2] allow one or two transitions in a seed hit
160 (by default a transition is allowed)
161 --word=&lt;bits&gt; set max bits for word hash; use this to trade time for
162 memory, eliminating thrashing for heavy seeds
163 (default is 28 bits)
164 --[no]filter=[&lt;T&gt;:]&lt;M&gt; filter half-weight seed hits, requiring at least M
165 matches and allowing no more than T transversions
166 (default is no filtering)
167 --notwins require just one seed hit
168 --twins=[&lt;min&gt;:]&lt;maxgap&gt; require two nearby seed hits on the same diagonal
169 (default is twins aren't required)
170 --notwins allow single, isolated seeds
171 --[no]recoverseeds avoid losing seeds in hash collisions. Cannot be used with --twins
172 --seedqueue=&lt;entries&gt; set number of entries in seed hit queue
173 (default is 262144)
174 --anchors=&lt;file&gt; read anchors from a file, instead of discovering anchors
175 via seeding
176 --recoverhits recover hash-collision seed hits
177 (default is not to recover seed hits)
178 --step=&lt;length&gt; set step length (default is 1)
179 --maxwordcount=&lt;limit&gt; words occurring more often than &lt;limit&gt; in the target
180 are not eligible for seeds
181 --strand=both search both strands
182 --strand=plus search + strand only (matching strand of query spec)
183 --strand=minus search - strand only (opposite strand of query spec)
184 (by default both strands are searched)
185 --ambiguousn treat N as an ambiguous nucleotide
186 (by default N is treated as a sequence splicing character)
187 --[no]gfextend perform gap-free extension of seed hits to HSPs
188 (by default no extension is performed)
189 --[no]chain perform chaining
190 --chain=&lt;diag,anti&gt; perform chaining with given penalties for diagonal and
191 anti-diagonal
192 (by default no chaining is performed)
193 --[no]gapped perform gapped alignment (instead of gap-free)
194 (by default gapped alignment is performed)
195 --score[s]=&lt;file&gt; read substitution scores from a file
196 (default is HOXD70)
197 --unitscore[s] scores are +1/-1 for match/mismatch
198 --gap=&lt;[open,]extend&gt; set gap open and extend penalties (default is 400,30)
199 --xdrop=&lt;score&gt; set x-drop threshold (default is 10*sub[A][A])
200 --ydrop=&lt;score&gt; set y-drop threshold (default is open+300extend)
201 --infer[=&lt;control&gt;] infer scores from the sequences, then use them
202 --inferonly[=&lt;control&gt;] infer scores, but don't use them (requires --infscores)
203 all inference options are read from the control file
204 --infscores[=&lt;file&gt;] write inferred scores to a file
205 --hspthresh=&lt;score&gt; set threshold for high scoring pairs (default is 3000)
206 ungapped extensions scoring lower are discarded
207 &lt;score&gt; can also be a percentage or base count
208 --entropy adjust for entropy when qualifying HSPs in the x-drop extension
209 method
210 --noentropy don't adjust for entropy when qualifying HSPs
211 --exact=&lt;length&gt; set threshold for exact matches
212 if specified, exact matches are found rather than high
213 scoring pairs (replaces --hspthresh)
214 --inner=&lt;score&gt; set threshold for HSPs during interpolation
215 (default is no interpolation)
216 --gappedthresh=&lt;score&gt; set threshold for gapped alignments
217 gapped extensions scoring lower are discarded
218 &lt;score&gt; can also be a percentage or base count
219 (default is to use same value as --hspthresh)
220 --ball=&lt;score&gt; set minimum score required of words 'in' a quantum ball
221 --[no]entropy involve entropy in filtering high scoring pairs
222 (default is "entropy")
223 --[no]mirror report/use mirror image of all gap-free alignments
224 (default is "mirror" for self-alignments only)
225 --traceback=&lt;bytes&gt; space for trace-back information
226 (default is 80.0M)
227 --masking=&lt;count&gt; mask any position in target hit this many times
228 zero indicates no masking
229 (default is no masking)
230 --targetcapsule=&lt;capsule_file&gt; the target seed word position table and seed
231 (as well as the target sequence)are read from specified file
232 --segments=&lt;segment_file&gt; read segments from a file, instead of discovering
233 them via seeding. Replaces other seeding or gap-free extension
234 options
235 --[no]census[=&lt;file&gt;] count/report how many times each target base aligns
236 (default is to not report census)
237 --identity=&lt;min&gt;[..&lt;max&gt;] filter alignments by percent identity
238 0&lt;=min&lt;=max&lt;=100; blocks (or HSPs) outside min..max
239 are discarded
240 (default is no identity filtering)
241 --coverage=&lt;min&gt;[..&lt;max&gt;] filter alignments by percentage pf query covered
242 0&lt;=min&lt;=max&lt;=100; blocks (or HSPs) outside min..max
243 are discarded
244 (default is no query coverage filtering)
245 --notrivial do not output trivial self-alignment block if the target and query
246 sequences are identical. Using --self enables this option automatically
247 --output=&lt;output_file&gt; write the alignments to the specified file name instead of stdout
248 --code=&lt;file&gt; give quantum code for query sequence (only for display)
249 --format=&lt;type&gt; specify output format; one of lav, axt, maf, maf+, maf-, text,
250 lav+text, cigar, text, rdplot, general, or general:&lt;fields&gt;
251 (by default output is LAV)
252 --rdotplot=&lt;file&gt; create an additional output file suitable for plotting the alignments
253 with the R statistical package.
254 --markend Just before normal completion, write "# lastz end-of-file" to output file
255 --census[=&lt;output_file&gt;] count and report how many times each target base aligns, up
256 to 255. Ns are included in the count
257 --census16[=&lt;output_file&gt;] count and report how many times each target base aligns, up
258 up 65 thousand
259 --census32[=&lt;output_file&gt;] count and report how many times each target bas aligns, up
260 to 4 billion
261 --writecapsule=&lt;capsule_file&gt; just write out a targegt capsule file and quit; don't
262 search for seeds or perform subsequent stages
263 --verbosity=&lt;level&gt; set info level (0 is minimum, 10 is everything)
264 (default is 0)
265 --[no]runtime report runtime in the output file
266 (default is to not report runtime)
267 --tableonly[=count] just produce the target position table, don't
268 search for seeds
269 --[no]stats[=&lt;file&gt;] show search statistics (or don't)
270 (not available in this build)
271 --version report the program version and quit
272 --help list all options
273 --help=files list information about file specifiers
274 --help=short[cuts] list blastz-compatible shortcuts
275 --help=yasra list yasra-specific shortcuts
276
277 </help>
278 </tool>