0
|
1 <tool id="blat_wrapper" name="BLAT" version="0.2">
|
|
2 <description></description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="35">blat</requirement>
|
|
5 </requirements>
|
1
|
6 <version_command>blat|head -n 1</version_command>
|
0
|
7 <command>
|
|
8 blat $database $query -t=$databaseType.databaseType_select -q=$databaseType.queryType
|
|
9 #if str($databaseType.tileSize)
|
|
10 -tileSize=$databaseType.tileSize
|
|
11 #end if
|
|
12 #if str($stepSize)
|
|
13 -stepSize=$stepSize
|
|
14 #end if
|
|
15 #if $oneOff
|
|
16 -oneOff=1
|
|
17 #end if
|
|
18 #if str($databaseType.minMatch)
|
|
19 -minMatch=$databaseType.minMatch
|
|
20 #end if
|
|
21 #if str($minScore)
|
|
22 -minScore=$minScore
|
|
23 #end if
|
|
24 #if str($databaseType.minIdentity)
|
|
25 -minIdentity=$databaseType.minIdentity
|
|
26 #end if
|
|
27 #if str($maxGap)
|
|
28 -maxGap=$maxGap
|
|
29 #end if
|
|
30 #if str($repMatch)
|
|
31 -repMatch=$repMatch
|
|
32 #end if
|
|
33 #if $mask.mask_select
|
|
34 -mask=$mask.mask_select
|
|
35 #else if $mask.repeats
|
|
36 -repeats=$mask.repeats
|
|
37 #end if
|
|
38 #if $qMask
|
|
39 -qMask=$qMask
|
|
40 #end if
|
|
41 #if str($dots)
|
|
42 -dots=$dots
|
|
43 #end if
|
|
44 #if $trimT
|
|
45 -trimT
|
|
46 #end if
|
|
47 #if $noTrimA
|
|
48 -noTrimA
|
|
49 #end if
|
|
50 #if $trimHardA
|
|
51 -trimHardA
|
|
52 #end if
|
|
53 #if $fastMap
|
|
54 -fastMap
|
|
55 #end if
|
|
56 #if $fine
|
|
57 -fine
|
|
58 #end if
|
|
59 #if str($maxIntron)
|
|
60 -maxIntron=$maxIntron
|
|
61 #end if
|
|
62 #if $extendThroughN
|
|
63 -extendThroughN
|
|
64 #end if
|
|
65 -out=$out
|
|
66 $output > $logfile
|
|
67 </command>
|
|
68
|
|
69 <inputs>
|
|
70 <param name="database" type="data" format="fasta,twobit" label="Database" help="FASTA or 2bit format" />
|
|
71 <param name="query" type="data" format="fasta,twobit" label="Query" help="FASTA or 2bit format" />
|
|
72
|
|
73 <conditional name="databaseType">
|
|
74 <param name="databaseType_select" type="select" label="Database type (-t)">
|
|
75 <option value="dna" selected="true">DNA sequence (dna)</option>
|
|
76 <option value="prot">Protein sequence (prot)</option>
|
|
77 <option value="dnax">DNA sequence translated in six frames to protein (dnax)</option>
|
|
78 </param>
|
|
79 <when value="dna">
|
|
80 <param name="queryType" type="select" label="Select the query type (-q)">
|
|
81 <option value="dna" selected="true">DNA sequence (dna)</option>
|
|
82 <option value="rna">RNA sequence (rna)</option>
|
|
83 </param>
|
|
84 <param name="tileSize" type="integer" value="11" min="6" max="18" optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12" />
|
|
85 <param name="minMatch" type="integer" value="2" optional="true" label="Number of tile matches (-minMatch)">
|
|
86 <validator type="in_range" min="1" />
|
|
87 </param>
|
|
88 <param name="minIdentity" type="integer" value="90" optional="true" label="Minimum sequence identity (%) (-minIdentity)" />
|
|
89 </when>
|
|
90 <when value="prot">
|
|
91 <param name="queryType" type="select" label="Select the query type (-q)">
|
|
92 <option value="prot">Protein sequence (prot)</option>
|
|
93 </param>
|
|
94 <param name="tileSize" type="integer" value="5" min="3" max="8" optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12" />
|
|
95 <param name="minMatch" type="integer" value="1" optional="true" label="Number of tile matches (-minMatch)">
|
|
96 <validator type="in_range" min="1" />
|
|
97 </param>
|
|
98 <param name="minIdentity" type="integer" value="25" optional="true" label="Minimum sequence identity (%) (-minIdentity)" />
|
|
99 </when>
|
|
100 <when value="dnax">
|
|
101 <param name="queryType" type="select" label="Select the query type (-q)">
|
|
102 <option value="prot">Protein sequence (prot)</option>
|
|
103 <option value="dnax">DNA sequence translated in six frames to protein (dnax)</option>
|
|
104 <option value="rnax">DNA sequence translated in three frames to protein (rnax)</option>
|
|
105 </param>
|
|
106 <param name="tileSize" type="integer" value="5" min="3" max="8" optional="true" label="Size of match that triggers an alignment (-tileSize)" help="Usually between 8 and 12" />
|
|
107 <param name="minMatch" type="integer" value="1" optional="true" label="Number of tile matches (-minMatch)">
|
|
108 <validator type="in_range" min="1" />
|
|
109 </param>
|
|
110 <param name="minIdentity" type="integer" value="25" optional="true" label="Minimum sequence identity (%) (-minIdentity)" />
|
|
111 </when>
|
|
112 </conditional>
|
|
113
|
|
114 <!-- <param name="ooc" type="data" format="ooc" optional="true" label="Over-occuring N-mers file (-ooc) produced with blat -makeOoc" help="Use N as tileSize below." /> This should wait for a makeOoc wrapper -->
|
|
115
|
|
116 <param name="stepSize" type="integer" value="" optional="true" label="Spacing between tiles (-stepSize)" help="Default is tileSize">
|
|
117 <validator type="in_range" min="1" />
|
|
118 </param>
|
|
119
|
|
120 <param name="oneOff" type="boolean" checked="false" label="If set, this allows one mismatch in tile and still triggers an alignments (-oneOff)" />
|
|
121
|
|
122 <param name="minScore" type="integer" value="30" optional="true" label="Minimum score (-minScore)" help="It is the matches minus the mismatches minus some sort of gap penalty" />
|
|
123
|
|
124 <param name="maxGap" type="integer" value="2" optional="true" label="Maximum gap between tiles in a clump (-maxGap)" help="Usually set from 0 to 3. Only relevant for minMatch > 1" />
|
|
125
|
|
126 <!-- <param name="makeOoc" type="boolean" checked="false" label="Make overused tile file N.ooc (-makeOoc)" help="Target needs to be a complete genome" /> This should go in a separate wrapper since after making the ooc file, blat exits -->
|
|
127
|
|
128 <param name="repMatch" type="integer" value="" optional="true" label="Number of repetitions of a tile allowed before it is marked as overused (-repMatch)" help="Typically this is 256 for tileSize 12, 1024 for tileSize 11, 4096 for tileSize 10. Also affected by stepSize. When stepSize is halved repMatch is doubled to compensate" />
|
|
129
|
|
130 <conditional name="mask">
|
|
131 <param name="mask_select" type="select" label="Mask out repeats in database sequences (-mask)" help="Alignments won't be started in masked region but may extend through it in nucleotide searches. Masked areas are ignored completely in protein or translated searches">
|
|
132 <option value="">No masking</option>
|
|
133 <option value="lower">Mask out lower cased sequence</option>
|
|
134 <option value="upper">Mask out upper cased sequence</option>
|
|
135 <!-- <option value="out">Mask out according to database.out RepeatMasker.out file</option>
|
|
136 <option value="file.out">Mask database according to RepeatMasker file.out</option>-->
|
|
137 </param>
|
|
138 <when value="">
|
|
139 <param name="repeats" type="select" label="Select repeat type if matches in repeat areas should be reported separately from matches in other areas (-repeats)">
|
|
140 <option value="">No masking</option>
|
|
141 <option value="lower">Mask out lower cased sequence</option>
|
|
142 <option value="upper">Mask out upper cased sequence</option>
|
|
143 <!-- <option value="out">Mask out according to database.out RepeatMasker.out file</option>
|
|
144 <option value="file.out">Mask database according to RepeatMasker file.out</option>-->
|
|
145 </param>
|
|
146 </when>
|
|
147 <when value="lower" />
|
|
148 <when value="upper" />
|
|
149 </conditional>
|
|
150 <param name="qMask" type="select" label="Mask out repeats in query sequences (-qMask)">
|
|
151 <option value="">No masking</option>
|
|
152 <option value="lower">Mask out lower cased sequence</option>
|
|
153 <option value="upper">Mask out upper cased sequence</option>
|
|
154 <!-- <option value="out">Mask out according to database.out RepeatMasker .out file</option>
|
|
155 <option value="file.out">Mask database according to RepeatMasker file.out</option>-->
|
|
156 </param>
|
|
157 <!-- <param name="minRepDivergence" type="integer" optional="true" value="15" label="Minimum percent divergence of repeats to allow them to be unmasked (-minRepDivergence)" help="Only relevant for masking using RepeatMasket .out files" />-->
|
|
158
|
|
159 <param name="dots" type="integer" value="" optional="true" label="Output a dot every N sequences in log (-dots)" help="Dots show program's progress" />
|
|
160
|
|
161 <param name="trimT" type="boolean" checked="false" label="Trim leading poly-T (-trimT)" />
|
|
162
|
|
163 <param name="noTrimA" type="boolean" checked="false" label="Don't trim trailing poly-A (-noTrimA)" />
|
|
164
|
|
165 <param name="trimHardA" type="boolean" checked="false" label="Remove poly-A tail from qSize and alignments in .psl output (-trimHardA)" />
|
|
166
|
|
167 <param name="fastMap" type="boolean" checked="false" label="Run for fast DNA/DNA remapping (-fastMap)" help="It does not allow introns and require high %ID. Query sizes must not exceed 5000" />
|
|
168
|
|
169 <param name="fine" type="boolean" checked="false" label="Refine search for small initial and terminal exons (-fine)" help="For high-quality mRNAs. Not recommended for ESTs" />
|
|
170 <param name="maxIntron" type="integer" value="750000" optional="true" label="Maximum intron size (-maxIntron)" />
|
|
171 <param name="extendThroughN" type="boolean" checked="false" label="Allow extension of alignment through large blocks of N's (-extendThroughN)" />
|
|
172 <param name="out" type="select" label="Select output file format (-out)">
|
|
173 <option value="psl">Tab-separated format, no sequence (psl)</option>
|
|
174 <option value="psl -noHead">Tab-separated format, no sequence, no header (psl -noHead)</option>
|
|
175 <option value="pslx">Tab-separated format with sequence (pslx)</option>
|
|
176 <option value="pslx -noHead">Tab-separated format with sequence, no header (pslx -noHead)</option>
|
|
177 <option value="axt">Blastz-associated axt format (axt)</option>
|
|
178 <option value="maf">Multiz-associated maf format (maf)</option>
|
|
179 <option value="sim4">Similar to sim4 format (sim4)</option>
|
|
180 <option value="wublast">Similar to WU-BLAST format (wublast)</option>
|
|
181 <option value="blast">Similar to NCBI BLAST format (blast)</option>
|
|
182 <option value="blast8">NCBI BLAST tabular format (blast8)</option>
|
|
183 <option value="blast9">NCBI BLAST tabular format with comments (blast9)</option>
|
|
184 </param>
|
|
185 </inputs>
|
|
186
|
|
187 <outputs>
|
|
188 <data name="logfile" format="txt" label="${tool.name} on ${on_string}: log" />
|
|
189 <data name="output" format="txt" label="${tool.name} on ${on_string}: alignment">
|
|
190 <change_format>
|
|
191 <when input="out" value="psl -noHead" format="tabular" />
|
|
192 <when input="out" value="pslx -noHead" format="tabular" />
|
|
193 <when input="out" value="axt" format="axt" />
|
|
194 <when input="out" value="maf" format="maf" />
|
|
195 <when input="out" value="blast8" format="tabular" />
|
|
196 </change_format>
|
|
197 </data>
|
|
198 </outputs>
|
|
199 <tests>
|
1
|
200 <test>
|
|
201 <param name="database" value="databasetest1.fasta" ftype="fasta" />
|
|
202 <param name="query" value="input83.fasta" ftype="fasta" />
|
|
203 <param name="databaseType_select" value="dna" />
|
|
204 <param name="queryType" value="dna" />
|
|
205 <param name="tileSize" value="11" />
|
|
206 <param name="minMatch" value="2" />
|
|
207 <param name="stepSize" value="11" />
|
|
208 <param name="oneOff" value="false" />
|
|
209 <param name="minScore" value="30" />
|
|
210 <param name="maxGap" value="2" />
|
|
211 <param name="mask_select" value="lower" />
|
|
212 <param name="qMask" value="lower" />
|
|
213 <param name="trimT" value="true" />
|
|
214 <param name="noTrimA" value="false" />
|
|
215 <param name="fine" value="false" />
|
|
216 <param name="maxIntron" value="750000" />
|
|
217 <param name="extendThroughN" value="false" />
|
|
218 <param name="out" value="blast8" />
|
|
219 <output name="logfile" file="log.txt" />
|
|
220 <output name="output" file="outputtest1.txt" />
|
|
221 </test>
|
0
|
222 </tests>
|
|
223 <help>
|
|
224 **What it does**
|
|
225
|
|
226 BLAT produces two major classes of alignments:
|
|
227
|
|
228 - at the DNA level between two sequences that are of 95% or greater identity, but which may include large inserts;
|
|
229 - at the protein or translated DNA level between sequences that are of 80% or greater identity and may also include large inserts.
|
|
230
|
|
231 The output of BLAT is flexible. By default it is a simple tab-delimited file which describes the alignment, but which does not include the sequence of the alignment itself. Optionally it can produce BLAST and WU-BLAST compatible output as well as a number of other formats.
|
|
232
|
|
233 **License and citation**
|
|
234
|
|
235 This Galaxy tool is Copyright © 2013 `CRS4 Srl.`_ and is released under the `MIT license`_.
|
|
236
|
|
237 .. _CRS4 Srl.: http://www.crs4.it/
|
|
238 .. _MIT license: http://opensource.org/licenses/MIT
|
|
239
|
|
240 If you use this tool in Galaxy, please cite |Cuccuru2013|_.
|
|
241
|
|
242 .. |Cuccuru2013| replace:: Cuccuru, G., Orsini, M., Pinna, A., Sbardellati, A., Soranzo, N., Travaglione, A., Uva, P., Zanetti, G., Fotia, G. (2013) Orione, a web-based framework for NGS analysis in microbiology. *Submitted*
|
|
243 .. _Cuccuru2013: http://orione.crs4.it/
|
|
244
|
|
245 This tool uses `BLAT`_, which is licensed separately. Please cite |Kent2002|_.
|
|
246
|
|
247 .. _BLAT: http://genome.ucsc.edu/FAQ/FAQblat.html
|
|
248 .. |Kent2002| replace:: Kent, W. J. (2002) BLAT – The BLAST-Like Alignment Tool. *Genome Res.* 12(4), 656-664
|
|
249 .. _Kent2002: http://genome.cshlp.org/content/12/4/656
|
|
250 </help>
|
|
251 </tool>
|