Mercurial > repos > nml > smalt
comparison smalt_map.xml @ 0:51ad86498414 draft
planemo upload for repository https://sourceforge.net/projects/smalt/ commit dad1050d2043119952eb284fcd089519f28e4255
author | nml |
---|---|
date | Wed, 27 Sep 2017 16:03:01 -0400 |
parents | |
children | fae9ec82e10f |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:51ad86498414 |
---|---|
1 <tool id="smalt" name="smalt" version="1.0.0" > | |
2 <description>Map query reads (FASTA/FASTQ) format onto the reference sequences</description> | |
3 <requirements> | |
4 <requirement type="package" version="0.7.6">smalt</requirement> | |
5 <requirement type="package" version="1.5">samtools</requirement> | |
6 </requirements> | |
7 <stdio> | |
8 <exit_code range="1:" level="fatal" description="Unknown error" /> | |
9 <regex match="Command line error" | |
10 source="stdout" | |
11 level="fatal" | |
12 description="You cannot do that!! What were you thinking!" /> | |
13 <regex match="ERROR" | |
14 source="stderr" | |
15 level="fatal" | |
16 description="You cannot do that!! What were you thinking!" /> | |
17 </stdio> | |
18 <command> | |
19 <![CDATA[ | |
20 ## prepare smalt index | |
21 smalt index | |
22 | |
23 #if $k: | |
24 -k "$k" | |
25 #end if | |
26 | |
27 #if $s: | |
28 -s "$s" | |
29 #end if | |
30 | |
31 'temp' "$reference" && | |
32 | |
33 smalt map | |
34 | |
35 -o $output | |
36 | |
37 #if $oformat.outformat == "sam": | |
38 #if $oformat.samOptions: | |
39 -f "$oformat.outformat:$oformat.samOptions" | |
40 #else | |
41 -f "$oformat.outformat" | |
42 #end if | |
43 #elif $oformat.outformat == "bam": | |
44 #if $oformat.bamOptions: | |
45 -f "$oformat.outformat:$oformat.bamOptions" | |
46 #else | |
47 -f "$oformat.outformat" | |
48 #end if | |
49 #else | |
50 -f "$oformat.outformat" | |
51 #end if | |
52 | |
53 | |
54 | |
55 -n \${GALAXY_SLOTS:-2} | |
56 | |
57 #if $singlePaired.sPaired != "single": | |
58 -l $singlePaired.pairtype | |
59 #end if | |
60 | |
61 | |
62 #if $mincover: | |
63 -c "$mincover" | |
64 #end if | |
65 | |
66 #if $scordiff: | |
67 -d "$scordiff" | |
68 #end if | |
69 | |
70 #if $insfil: | |
71 -g "$insfil" | |
72 #end if | |
73 | |
74 #if $insertmax: | |
75 -i "$insertmax" | |
76 #end if | |
77 | |
78 #if $insertmin: | |
79 -j "$insertmin" | |
80 #end if | |
81 | |
82 #if $minscor: | |
83 -m "$minscor" | |
84 #end if | |
85 | |
86 #if $minbasq: | |
87 -q "$minbasq" | |
88 #end if | |
89 | |
90 #if $seed: | |
91 -r "$seed" | |
92 #end if | |
93 | |
94 #if $sw_weighted: | |
95 -w | |
96 #end if | |
97 | |
98 #if $search_harder: | |
99 -x | |
100 #end if | |
101 | |
102 #if $minid: | |
103 -y "$minid" | |
104 #end if | |
105 | |
106 | |
107 'temp' | |
108 | |
109 #if $singlePaired.sPaired == "single": | |
110 $singlePaired.sInput1 | |
111 #elif $singlePaired.sPaired == "paired": | |
112 $singlePaired.pInput1 $singlePaired.pInput2 | |
113 #elif $singlePaired.sPaired == "collections": | |
114 $singlePaired.fastq_collection.forward $singlePaired.fastq_collection.reverse | |
115 #end if | |
116 | |
117 | |
118 #if $oformat.outformat == "bam": | |
119 && samtools sort -@ \${GALAXY_SLOTS:-1} $output -o sorted && mv sorted $output | |
120 #end if | |
121 | |
122 | |
123 ]]> | |
124 </command> | |
125 | |
126 | |
127 <inputs> | |
128 <conditional name="singlePaired"> | |
129 <param name="sPaired" type="select" label="What is the library type?"> | |
130 <option value="single">Single-end</option> | |
131 <option value="paired">Paired-end</option> | |
132 <option value="collections">Paired-end Collections</option> | |
133 </param> | |
134 <when value="single"> | |
135 <param name="sInput1" type="data" format="fastq" label="Single end illumina fastq file" optional="false"/> | |
136 </when> | |
137 <when value="paired"> | |
138 <param name="pInput1" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="Forward FASTQ file" help="Must have ASCII encoded quality scores"/> | |
139 <param name="pInput2" type="data" format="fastq,fastqsanger,fastqillumina,fastqsolexa" label="Reverse FASTQ file" help="File format must match the Forward FASTQ file"/> | |
140 <param name="pairtype" type="select" label="Pair Type" help="Type of read pair library"> | |
141 <option value="pe">Illumina paired-end (short inserts)</option> | |
142 <option value="mp">Illumina mate-pair library (long inserts)</option> | |
143 <option value="pp">Mate-pair sequenced on the same strand</option> | |
144 </param> | |
145 </when> | |
146 <when value="collections"> | |
147 <param name="fastq_collection" type="data_collection" label="Paired-end Fastq collection" help="" optional="false" format="txt" collection_type="paired" /> | |
148 <param name="pairtype" type="select" label="Pair Type" help="Type of read pair library"> | |
149 <option value="pe">Illumina paired-end (short inserts)</option> | |
150 <option value="mp">Illumina mate-pair library (long inserts)</option> | |
151 <option value="pp">Mate-pair sequenced on the same strand</option> | |
152 </param> | |
153 </when> | |
154 | |
155 </conditional> | |
156 | |
157 | |
158 | |
159 <!-- reference genome --> | |
160 <param name="reference" type="data" format="fasta" label="Select fasta reference"/> | |
161 <param name="k" type="integer" value="13" label="K-mer size" help="Specifies the word length. [wordlen] is an integer within the limits. between 3 and 20. The default word length is 13" max="20" min="3"/> | |
162 <param name="s" type="integer" optional="true" label="Step size" help="Specifies how many bases are skipped between indexed words."/> | |
163 | |
164 | |
165 <param name="mincover" type="text" label="Mincover" help="Only consider mappings where the k-mer word seeds cover the query read to a minimum extent"/> | |
166 <param name="scordiff" type="text" label="Scordiff" help="Set a threshold of the Smith-Waterman alignment score relative to the maximum score"/> | |
167 <conditional name="oformat"> | |
168 <param name="outformat" type="select" label="Format" help=""> | |
169 <option value="cigar">cigar</option> | |
170 <option value="sam" selected="true">sam</option> | |
171 <option value="ssaha">ssaha</option> | |
172 <option value="bam">bam</option> | |
173 </param> | |
174 <when value="sam"> | |
175 <param name="samOptions" type="select" display="checkboxes" label="Sam Options" multiple="true"> | |
176 <option value="nohead">No Header</option> | |
177 <option value="clip">Hard Clip</option> | |
178 </param> | |
179 </when> | |
180 <when value="bam"> | |
181 <param name="bamOptions" type="select" display="checkboxes" label="Bam Options" multiple="true"> | |
182 <option value="clip">Hard Clip</option> | |
183 </param> | |
184 </when> | |
185 <when value="cigar"> | |
186 </when> | |
187 <when value="ssaha"> | |
188 </when> | |
189 </conditional> | |
190 <param name="insfil" type="data" optional="true" label="Distribution insert sizes " help="Use the distribution of insert sizes stored in the file [insfil. Thisfile is in ASCII format and can be generated using the 'sample'" format="sam"/> | |
191 <param name="insertmax" type="text" label="Maximum insert size (only in paired-end mode). " help="Maximum insert size (only in paired-end mode). The default is 500."/> | |
192 <param name="insertmin" type="text" label="Minimum insert size (only in paired-end mode). " help="Minimum insert size (only in paired-end mode). The default is 0."/> | |
193 | |
194 | |
195 <param name="minscor" type="text" label="Sets an absolute threshold of the Smith-Waterman scores." help="Mappings with scores below that threshold will not be reported. The default is < minscor > = < wordlen > + < stepsiz > - 1"/> | |
196 | |
197 <param name="minbasq" type="text" label="Sets a base quality threshold (0 <= minbasq <= 10, default 0)" help="K-mer words of the read with nucleotides that have a base quality below this threshold are not looked up in the hash index."/> | |
198 | |
199 <param name="seed" type="text" label="If the there are multiple mappings with the same best alignment score report one picked at random." help="is an integer >= 0 used to seed the pseudo-random genarator."/> | |
200 | |
201 <param name="sw_weighted" type="boolean" label="Smith-Waterman scores are complexity weighted."/> | |
202 | |
203 <param name="search_harder" type="boolean" label="This flag triggers a more exhaustive search for alignments at the cost of decreased speed" help="In paired-end mode each mate is mapped independently. (By default the mate with fewer hits in the hash index is mapped first and the vicinity is searched for mappings of its mate.)"/> | |
204 | |
205 <param name="minid" type="text" label="Sets an identity threshold for a mapping to be reported (default: 0)." help="specifies the number of exactly matching nucleotides either as a positive integer or as a fraction of the read length (>= 1.0)."/> | |
206 </inputs> | |
207 | |
208 <outputs> | |
209 <data name="output" format="cigar" > | |
210 <change_format> | |
211 <when input="oformat.outformat" value="cigar" format="cigar"/> | |
212 <when input="oformat.outformat" value="sam" format="sam"/> | |
213 <when input="oformat.outformat" value="ssaha" format="ssaha"/> | |
214 <when input="oformat.outformat" value="bam" format="bam"/> | |
215 </change_format> | |
216 </data> | |
217 </outputs> | |
218 <tests> | |
219 <test> | |
220 <param name="sPaired" value="paired"/> | |
221 <param name="pInput1" value="ecoli_1K_1.fq"/> | |
222 <param name="pInput2" value="ecoli_1K_2.fq"/> | |
223 <param name="pairtype" value="pe"/> | |
224 <param name="source" value="history"/> | |
225 <param name="reference" value="contigs.fasta"/> | |
226 <param name="outformat" value="sam"/> | |
227 <output name="output"> | |
228 <assert_contents> | |
229 <has_text text="SN:NODE_1_length_1000_cov_140.620106" /> | |
230 </assert_contents> | |
231 </output> | |
232 </test> | |
233 </tests> | |
234 <help> | |
235 | |
236 **What it does** | |
237 | |
238 SMALT is a pairwise sequence alignment program for the experimentingcient mapping of DNA sequencing reads onto genomic reference sequences. It uses a combination of short-word hashing and dynamic programming. Most types of sequencing platforms are supported including paired-end sequencing reads. | |
239 | |
240 | |
241 ------ | |
242 | |
243 | |
244 **Know what you are doing** | |
245 | |
246 .. class:: warningmark | |
247 | |
248 There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. | |
249 | |
250 .. __: http://www.sanger.ac.uk/resources/software/smalt/ | |
251 | |
252 ------ | |
253 | |
254 **Input formats** | |
255 | |
256 SMALT accepts files in Sanger FASTQ format (galaxy type *fastqsanger*). Use the FASTQ Groomer to prepare your files. | |
257 | |
258 ------ | |
259 | |
260 | |
261 Please cite the website "http://www.sanger.ac.uk/resources/software/smalt/". | |
262 | |
263 ------ | |
264 | |
265 | |
266 -a Output explicit alignments along with the mapping coordinates. | |
267 | |
268 -c <mincover INT> | |
269 Only consider mappings where the k-mer word seeds cover the query read to | |
270 a minimum extent. If <mincover> is an integer or floating point > 1.0, at | |
271 least this many bases of the read must be covered by k-mer word seeds. If | |
272 <mincover> is a floating point <= 1.0, it specifies the fraction of the | |
273 query read length that must be covered by k-mer word seeds. This option | |
274 is only valid in conjunction with the '-x' flag. | |
275 | |
276 -d <scordiff INT> | |
277 Set a threshold of the Smith-Waterman alignment score relative to the | |
278 maximum score. When mapping single reads, all alignments are reported | |
279 that have Smith-Waterman scores within <scorediff> of the maximum. | |
280 Mappings with lower scores are skipped. If <scorediff> is set to to a | |
281 value < 0, all alignments are printed that have scores above the | |
282 threshold specified with the '-m <minscor>' option. | |
283 For paired reads, only a value of 0 is supported. With the option '-d 0' | |
284 all aligments (pairings) with the best score are output. By default | |
285 (without the option '-d 0') single reads/mates with multiple best mappings | |
286 are reported as 'not mapped'. | |
287 | |
288 -f <ouform STR> | |
289 Specifies the output format. <ouform> can be either 'sam'(default), | |
290 'cigar', 'gff' or 'ssaha'. Optional extension 'sam:nohead,x,clip' | |
291 (see manual). Support for BAM format is dependent on additional | |
292 libraries (not installed). | |
293 | |
294 -F <inform STR> | |
295 Specifies the input format. The only available format is fastq (default). | |
296 Support for BAM and SAM formats (see: samtools.sourceforge.net) depends | |
297 on additional libraries (not installed). | |
298 | |
299 -g <insfil STR> | |
300 Use the distribution of insert sizes stored in the file <insfil>. This | |
301 file is in ASCII format and can be generated using the 'sample' task see | |
302 'smalt sample -H' for help). | |
303 | |
304 -H Print these instructions. | |
305 | |
306 -i <insert_max INT> | |
307 Maximum insert size (only in paired-end mode). The default is 500. | |
308 | |
309 -j <insert_min INT> | |
310 Minimum insert size (only in paired-end mode). The default is 0. | |
311 | |
312 -l <pairtyp STR> | |
313 Type of read pair library. <pairtyp> can be either 'pe', i.e. for | |
314 the Illumina paired-end library for short inserts ( \|—> <—\| ). 'mp' | |
315 for the Illumina mate-pair library for long inserts ( <—\| \|—> ) or | |
316 'pp' for mates sequenced on the same strand ( \|—> \|—> ). 'pe' is the | |
317 default. | |
318 | |
319 -m <minscor INT> | |
320 Sets an absolute threshold of the Smith-Waterman scores. Mappings with | |
321 scores below that threshold will not be reported. The default is | |
322 <minscor> = <wordlen> + <stepsiz> - 1. | |
323 | |
324 -n <nthreads INT> | |
325 Run smalt using mutiple threads. <nthread> is the number of additional | |
326 threads forked. The order of the reads in the input files is not preserved | |
327 for the output unless '-O' is also specified. | |
328 | |
329 -o <oufilnam STR> | |
330 Write mapping output (e.g. SAM lines) to a separate file. If this option | |
331 is not specified, mappings are written to standard output. | |
332 | |
333 -O Output mappings in the order of the reads in the input files when using | |
334 multiple threads (option '-n <nthreads>'). | |
335 | |
336 | |
337 -p Report partial alignments if they are complementary on the read (split | |
338 reads). | |
339 | |
340 -q <minbasq INT> | |
341 Sets a base quality threshold (0 <= minbasq <= 10, default 0). | |
342 K-mer words of the read with nucleotides that have a base quality below | |
343 this threshold are not looked up in the hash index. | |
344 | |
345 -r <seed INT> | |
346 If <seed> >= 0 report an alignment selected at random where there are | |
347 multiple mappings with the same best alignment score. With <seed> = 0 | |
348 (default) a seed is derived from the current calendar time. If <seed> | |
349 < 0 reads with multiple best mappings are reported as 'not mapped'. | |
350 | |
351 -S <scorspec STR> | |
352 Specify alignment penalty scores for a match or mismatch (substitution), | |
353 or for opening or extending a gap. <scorspec> is a comma speparated | |
354 list of integer assigments to one or more of the following variables: | |
355 match, subst, gapopen, gapext, i.e. 'gapopen=-5,gapext=-4' (no spaces | |
356 allowed in <scorespec>). Default:'match=1,subst=-2,gapopen=-4,gapext=-3' | |
357 | |
358 -w Smith-Waterman scores are complexity weighted. | |
359 | |
360 -x This flag triggers a more exhaustive search for alignments at the cost | |
361 of speed. In paired-end mode each mate is mapped independently.(By | |
362 default the mate with fewer hits in the hash index is mapped first and | |
363 the vicinity is searched for mappings of its mate.) | |
364 | |
365 -y <minid FLT> | |
366 Sets an identity threshold for a mapping to be reported (default: 0). | |
367 <minid> specifies the number of exactly matching nucleotides either as | |
368 a positive integer or as a fraction of the read length (<= 1.0). | |
369 </help> | |
370 </tool> |