comparison magicblast.xml @ 0:e6799e98c5fb draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blast commit 15fc6c06f743bae276ff02dc405e7da61a07bd08"
author iuc
date Tue, 05 Apr 2022 12:11:08 +0000
parents
children aea6702a3cd5
comparison
equal deleted inserted replaced
-1:000000000000 0:e6799e98c5fb
1 <tool id="magicblast" name="Magic-BLAST: map large RNA or DNA sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>against a whole genome or transcriptome</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <command detect_errors="exit_code"><![CDATA[
8 #import os
9
10 magicblast
11 -num_threads \${GALAXY_SLOTS:-8}
12 #if $query.is_of_type('fasta.gz', 'fastqsanger.gz'):
13 -query <(gunzip -c '${query}')
14 #else:
15 -query '${query}'
16 #end if
17 #if $query_mate:
18 -paired
19 #if $query.is_of_type('fasta.gz', 'fastqsanger.gz'):
20 -query_mate <(gunzip -c '${query}')
21 #else:
22 -query_mate '${query}'
23 #end if
24 #end if
25
26 #if $query.is_of_type('fastqsanger', 'fastqsanger.gz'):
27 -infmt fastq
28 #end if
29
30 #if $db_opts.db_opts_selector == "histdb":
31 -db '${os.path.join($db_opts.histdb.extra_files_path, "blastdb")}'
32 #elif $db_opts.db_opts_selector == "db":
33 -db '${os.path.join($db_opts.database.fields.path, "blastdb")}'
34 #else:
35 #if $db_opts.subject.is_of_type('fasta.gz'):
36 -subject <(gunzip -c '${$db_opts.subject}')
37 #else:
38 -subject '${db_opts.subject}'
39 #end if
40 #end if
41
42 ## General search options
43 -word_size $general_search.word_size
44 -gapopen $general_search.gapopen
45 -gapextend $general_search.gapextend
46 -penalty $general_search.penalty
47 -max_intron_length $general_search.max_intron_length
48
49 ## Query filtering options
50 $query_filtering.lcase_masking
51 -validate_seqs $query_filtering.validate_seqs
52 -limit_lookup $query_filtering.limit_lookup
53 -max_db_word_count $query_filtering.max_db_word_count
54 -lookup_stride $query_filtering.lookup_stride
55
56 ## Restrict database search
57 #if $restrict_search.gilist:
58 -gilist '$restrict_search.gilist'
59 #end if
60 #if $restrict_search.negative_gilist:
61 -negative_gilist '$restrict_search.negative_gilist'
62 #end if
63 #if $restrict_search.seqidlist:
64 -seqidlist '$restrict_search.seqidlist'
65 #end if
66 #if $restrict_search.negative_seqidlist:
67 -negative_seqidlist '$restrict_search.negative_seqidlist'
68 #end if
69 #if str($restrict_search.taxids) != '':
70 --taxids '$restrict_search.taxids'
71 #end if
72 #if $restrict_search.taxidlist:
73 -taxidlist '$restrict_search.taxidlist'
74 #end if
75 #if str($restrict_search.negative_taxids) != '':
76 --negative_taxids '$restrict_search.negative_taxids'
77 #end if
78 #if $restrict_search.negative_taxidlist:
79 -negative_taxidlist '$restrict_search.negative_taxidlist'
80 #end if
81
82 ## Mapping options
83 -score $mapping.score
84 #if $mapping.max_edit_dist > 0:
85 -max_edit_dist $mapping.max_edit_dist
86 #end if
87 -splice '$mapping.splice'
88 -reftype '$mapping.reftype'
89
90 ## Output unaligned options
91 #if str($output_options.report_unaligned_cond.report_unaligned) == 'yes':
92 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == 'yes':
93 -out_unaligned 'out_unaligned'
94 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == 'bam':
95 -unaligned_fmt 'sam'
96 #else:
97 -unaligned_fmt '$output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt'
98 #end if
99 #end if
100 #else:
101 -no_unaligned
102 #end if
103
104 ## Additional output options
105 $output_options.no_discordant
106 ## Switch default SAM output to be BAM.
107 #if str($output_options.outfmt_cond.outfmt) == 'bam':
108 $output_options.outfmt_cond.md_tag
109 #if $query_mate:
110 $output_options.outfmt_cond.no_query_id_trim
111 #end if
112 -out 'output.sam'
113 #if str($output_options.outfmt_cond.output_sort) == 'coordinate':
114 && samtools sort -@\${GALAXY_SLOTS:-4} -O bam 'output.sam' > '$output'
115 #elif str($output_options.outfmt_cond.output_sort) == 'name':
116 && samtools sort -n -@\${GALAXY_SLOTS:-4} -O bam -o 'output.sam' > '$output'
117 #else:
118 && samtools view -@\${GALAXY_SLOTS:-4} -bS 'output.sam' > '$output'
119 #end if
120 #else:
121 -out '$output'
122 -outfmt '$output_options.outfmt_cond.outfmt'
123 #end if
124
125 ## Convert out_unaligned from SAM to BAM if necessary
126
127 #if str($output_options.report_unaligned_cond.report_unaligned) == 'yes':
128 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == 'yes':
129 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == 'bam':
130 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.output_sort) == 'coordinate':
131 && samtools sort -@\${GALAXY_SLOTS:-4} -O bam -o 'out_unaligned' > '$output_unaligned'
132 #elif str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.output_sort) == 'name':
133 && samtools sort -n -@\${GALAXY_SLOTS:-4} -O bam -o 'out_unaligned' > '$output_unaligned'
134 #else:
135 && samtools view -@\${GALAXY_SLOTS:-4} -bS 'out_unaligned' > '$output_unaligned'
136 #end if
137 #else:
138 && mv 'out_unaligned' '$output_unaligned'
139 #end if
140 #end if
141 #end if
142 ]]></command>
143 <inputs>
144 <param argument="-query" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" label="Query file" help="Fasta or fastqsanger, optionally gzipped"/>
145 <param argument="-query_mate" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" optional="true" label="Query mate file (optional)" help="Fasta or fastqsanger, optionally gzipped"/>
146 <conditional name="db_opts">
147 <param name="db_opts_selector" type="select" label="Subject database/sequences">
148 <option value="histdb" selected="true">blast database from your history</option>
149 <option value="db">Locally installed blast database</option>
150 <option value="file">fasta file from your history (see warning in the tool help section below)</option>
151 </param>
152 <when value="histdb">
153 <param name="histdb" type="data" format="blastdbn" label="Nucleotide blast database"/>
154 </when>
155 <when value="db">
156 <param name="database" type="select" multiple="true" optional="false" label="Nucleotide blast database">
157 <options from_data_table="blastdb"/>
158 </param>
159 </when>
160 <when value="file">
161 <param argument="-subject" type="data" format="fasta,fasta.gz" label="Nucleotide fasta subject file to use instead of a database"/>
162 </when>
163 </conditional>
164 <section name="general_search" title="General search">
165 <param argument="-word_size" type="integer" value="18" min="12" label="Minimum number of consecutive bases matching exactly"/>
166 <param argument="-gapopen" type="integer" value="0" min="0" label="Cost to open a gap"/>
167 <param argument="-gapextend" type="integer" value="0" min="0" label="Cost to extend a gap"/>
168 <param argument="-penalty" type="integer" value="-4" max="0" label="Penalty for a nucleotide mismatch"/>
169 <param argument="-max_intron_length" type="integer" value="500000" min="0" label="Maximum allowed intron length"/>
170 </section>
171 <section name="query_filtering" title="Query filtering">
172 <param argument="-lcase_masking" type="boolean" truevalue="-lcase_masking" falsevalue="" checked="false" label="Use lower case filtering in subject sequences?"/>
173 <param argument="-validate_seqs" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Reject low quality sequences?"/>
174 <param argument="-limit_lookup" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Remove word seeds with high frequency in the searched database?"/>
175 <param argument="-max_db_word_count" type="integer" value="30" min="0" label="Words that appear more than this number of times in the database will be masked in the lookup table"/>
176 <param argument="-lookup_stride" type="integer" value="0" min="0" label="Number of words to skip after collecting one while creating a lookup table"/>
177 </section>
178 <section name="restrict_search" title="Restrict database search">
179 <param argument="-gilist" type="data" format="tabular" optional="true" label="Tabular file containing list of GIs to which to restrict database search" help="Available only for database searches"/>
180 <param argument="-negative_gilist" type="data" format="tabular" optional="true" label="Tabular file containing list of GIs to restrict database search to everything except the specified GIs" help="Available only for database searches"/>
181 <param argument="-seqidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of SeqIDs to which to restrict database search" help="Available only for database searches"/>
182 <param argument="-negative_seqidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of SeqIDs to restrict database search to everything except the specified SeqIDs" help="Available only for database searches"/>
183 <param argument="-taxids" type="text" optional="true" label="Comma-separated list of taxonomy IDs to which to restrict database search" help="Available only for database searches">
184 <expand macro="sanitize_query" validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation"/>
185 </param>
186 <param argument="-taxidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of taxonomy IDs to which to restrict database search" help="Available only for database searches"/>
187 <param argument="-negative_taxids" type="text" optional="true" label="Comma-separated list of taxonomy IDs to restrict database search to everything except the specified taxonomy IDs" help="Available only for database searches">
188 <expand macro="sanitize_query" validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation"/>
189 </param>
190 <param argument="-negative_taxidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of taxonomy IDs to restrict database search to everythin except the specified taxonomy IDs" help="Available only for database searches"/>
191 </section>
192 <section name="mapping" title="Mapping">
193 <param argument="-score" type="integer" value="0" min="0" label="Cutoff score for accepting alignments" help="Zero value ignores"/>
194 <param argument="-max_edit_dist" type="integer" value="0" min="0" label="Cutoff edit distance for accepting an alignment" help="Zero value is unlimited"/>
195 <param argument="-splice" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Search for spliced alignments?"/>
196 <param argument="-reftype" type="select" label="Type of the reference">
197 <option value="genome" selected="true">genome</option>
198 <option value="transcriptome">transcriptome</option>
199 </param>
200 </section>
201 <section name="output_options" title="Output options">
202 <conditional name="report_unaligned_cond">
203 <param name="report_unaligned" type="select" label="Report unaligned reads?">
204 <option value="yes" selected="true">Yes</option>
205 <option value="no">No</option>
206 </param>
207 <when value="yes">
208 <conditional name="report_unaligned_separately_cond">
209 <param name="report_unaligned_separately" type="select" label="Output unaligned reads to a separate file?" help="Select No to output all reads to the same file">
210 <option value="no" selected="true">No</option>
211 <option value="yes">Yes</option>
212 </param>
213 <when value="no"/>
214 <when value="yes">
215 <conditional name="unaligned_fmt_cond">
216 <param argument="-unaligned_fmt" type="select" label="Output format for unaligned reads">
217 <option value="bam" selected="true">bam</option>
218 <option value="tabular">tabular</option>
219 <option value="fasta">fasta</option>
220 </param>
221 <when value="bam">
222 <expand macro="output_sort_param"/>
223 </when>
224 <when value="tabular"/>
225 <when value="fasta"/>
226 </conditional>
227 </when>
228 </conditional>
229 </when>
230 <when value="no"/>
231 </conditional>
232 <conditional name="outfmt_cond">
233 <param argument="-outfmt" type="select" label="Output format">
234 <option value="bam" selected="true">bam</option>
235 <option value="tabular">tabular</option>
236 </param>
237 <when value="bam">
238 <expand macro="output_sort_param"/>
239 <param argument="-md_tag" type="boolean" truevalue="-md_tag" falsevalue="" checked="false" label="Include MD tag in BAM output?"/>
240 <param argument="-no_query_id_trim" type="boolean" truevalue="-no_query_id_trim" falsevalue="" checked="false" label="Do not trim '.1', '/1', '.2', or '/2' at the end of read ids in BAM output for paired reads?" help="Ignored if no query mate"/>
241 </when>
242 <when value="tabular"/>
243 </conditional>
244 <param argument="-no_discordant" type="boolean" truevalue="-no_discordant" falsevalue="" checked="false" label="Suppress discordant alignments for paired reads?" help="Ignored if no query mate"/>
245 </section>
246 </inputs>
247 <outputs>
248 <data name="output" format="bam" label="${tool.name} on ${on_string}">
249 <change_format>
250 <when input="output.outfmt_cond.outfmt" value="tabular" format="tabular"/>
251 </change_format>
252 </data>
253 <data name="output_unaligned" format="bam" label="${tool.name} on ${on_string}: unaligned reads">
254 <filter>output_options['report_unaligned_cond']['report_unaligned'] == 'yes' and output_options['report_unaligned_cond']['report_unaligned_separately_cond']['report_unaligned_separately'] == 'yes'</filter>
255 <change_format>
256 <when input="output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt" value="tabular" format="tabular"/>
257 <when input="output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt" value="fasta" format="fasta"/>
258 </change_format>
259 </data>
260 </outputs>
261 <tests>
262 <!-- Single fasta.gz input, subject file -->
263 <test expect_num_outputs="1">
264 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/>
265 <param name="db_opts_selector" value="file"/>
266 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/>
267 <output name="output" ftype="bam">
268 <assert_contents>
269 <has_size value="1247" delta="50"/>
270 </assert_contents>
271 </output>
272 </test>
273 <!-- Single fasta.gz input, subject file, output unaligned reads separately-->
274 <test expect_num_outputs="2">
275 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/>
276 <param name="db_opts_selector" value="file"/>
277 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/>
278 <param name="report_unaligned_separately" value="yes"/>
279 <param name="unaligned_fmt" value="tabular"/>
280 <output name="output" ftype="bam">
281 <assert_contents>
282 <has_size value="492" delta="50"/>
283 </assert_contents>
284 </output>
285 <output name="output_unaligned" ftype="tabular">
286 <assert_contents>
287 <has_size value="959"/>
288 </assert_contents>
289 </output>
290 </test>
291 <!-- Single fasta.gz input, subject file, gilist file, results in error -->
292 <test expect_failure="true">
293 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/>
294 <param name="db_opts_selector" value="file"/>
295 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/>
296 <param name="report_unaligned_separately" value="yes"/>
297 <param name="gilist" value="gilist1.tabular" ftype="tabular"/>
298 <assert_stderr>
299 <has_text text="Incompatible with argument:"/>
300 </assert_stderr>
301 </test>
302 <!-- Single fasta.gz input, cached db, taxidlist, results in error -->
303 <test expect_failure="true">
304 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/>
305 <param name="db_opts_selector" value="db"/>
306 <param name="database" value="phiX174"/>
307 <param name="taxidlist" value="taxids.tabular" ftype="tabular"/>
308 <assert_stderr>
309 <has_text text="Taxonomy filtering is not supported in v4 BLAST dbs"/>
310 </assert_stderr>
311 </test>
312 <!-- Paired fastqsanger.gz input, subject file -->
313 <test expect_num_outputs="1">
314 <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/>
315 <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/>
316 <param name="db_opts_selector" value="file"/>
317 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/>
318 <output name="output" ftype="bam">
319 <assert_contents>
320 <has_size value="62080" delta="50"/>
321 </assert_contents>
322 </output>
323 </test>
324 <!-- Paired fastqsanger.gz input, cached blast db -->
325 <test expect_num_outputs="1">
326 <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/>
327 <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/>
328 <param name="db_opts_selector" value="db"/>
329 <param name="database" value="phiX174"/>
330 <output name="output" ftype="bam">
331 <assert_contents>
332 <has_size value="62079" delta="50"/>
333 </assert_contents>
334 </output>
335 </test>
336 </tests>
337 <help><![CDATA[
338 **What it does**
339
340 .. class:: warningmark
341
342 In addition to a BLAST database, you can also search against a fasta file of subject (target) sequences. However, this is not
343 advised because it is slower (only one CPU is used), but more importantly gives e-values for pairwise searches (very small
344 e-values which will look overly signficiant). In most cases you should convert the fasta file into a blast database using
345 *makeblastdb* and search against that.
346
347 Magic-BLAST is a tool for mapping large next-generation RNA or DNA sequencing runs against a whole genome or transcriptome.
348 Each alignment optimizes a composite score, taking into account simultaneously the two reads of a pair, and in case of RNA-seq,
349 locating the candidate introns and adding up the score of all exons. This is very different from other versions of BLAST, where
350 each exon is scored as a separate hit and read-pairing is ignored.
351
352 Magic-BLAST incorporates within the NCBI BLAST code framework ideas developed in the NCBI Magic pipeline, in particular hit
353 extensions by local walk and jump, and recursive clipping of mismatches near the edges of the reads, which avoids accumulating
354 artefactual mismatches near splice sites and is needed to distinguish short indels from substitutions near the edges.
355
356 The tool accepts a single or paired set of reads in fasta or fastqsanger format and produces bam or tabular output.
357
358 More information about Magic-BLAST is available in the
359 `online documentation <https://ncbi.github.io/magicblast/>`_.
360 ]]></help>
361 <expand macro="citations"/>
362 </tool>