Mercurial > repos > iuc > magicblast
comparison magicblast.xml @ 0:e6799e98c5fb draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blast commit 15fc6c06f743bae276ff02dc405e7da61a07bd08"
author | iuc |
---|---|
date | Tue, 05 Apr 2022 12:11:08 +0000 |
parents | |
children | aea6702a3cd5 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e6799e98c5fb |
---|---|
1 <tool id="magicblast" name="Magic-BLAST: map large RNA or DNA sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
2 <description>against a whole genome or transcriptome</description> | |
3 <macros> | |
4 <import>macros.xml</import> | |
5 </macros> | |
6 <expand macro="requirements"/> | |
7 <command detect_errors="exit_code"><![CDATA[ | |
8 #import os | |
9 | |
10 magicblast | |
11 -num_threads \${GALAXY_SLOTS:-8} | |
12 #if $query.is_of_type('fasta.gz', 'fastqsanger.gz'): | |
13 -query <(gunzip -c '${query}') | |
14 #else: | |
15 -query '${query}' | |
16 #end if | |
17 #if $query_mate: | |
18 -paired | |
19 #if $query.is_of_type('fasta.gz', 'fastqsanger.gz'): | |
20 -query_mate <(gunzip -c '${query}') | |
21 #else: | |
22 -query_mate '${query}' | |
23 #end if | |
24 #end if | |
25 | |
26 #if $query.is_of_type('fastqsanger', 'fastqsanger.gz'): | |
27 -infmt fastq | |
28 #end if | |
29 | |
30 #if $db_opts.db_opts_selector == "histdb": | |
31 -db '${os.path.join($db_opts.histdb.extra_files_path, "blastdb")}' | |
32 #elif $db_opts.db_opts_selector == "db": | |
33 -db '${os.path.join($db_opts.database.fields.path, "blastdb")}' | |
34 #else: | |
35 #if $db_opts.subject.is_of_type('fasta.gz'): | |
36 -subject <(gunzip -c '${$db_opts.subject}') | |
37 #else: | |
38 -subject '${db_opts.subject}' | |
39 #end if | |
40 #end if | |
41 | |
42 ## General search options | |
43 -word_size $general_search.word_size | |
44 -gapopen $general_search.gapopen | |
45 -gapextend $general_search.gapextend | |
46 -penalty $general_search.penalty | |
47 -max_intron_length $general_search.max_intron_length | |
48 | |
49 ## Query filtering options | |
50 $query_filtering.lcase_masking | |
51 -validate_seqs $query_filtering.validate_seqs | |
52 -limit_lookup $query_filtering.limit_lookup | |
53 -max_db_word_count $query_filtering.max_db_word_count | |
54 -lookup_stride $query_filtering.lookup_stride | |
55 | |
56 ## Restrict database search | |
57 #if $restrict_search.gilist: | |
58 -gilist '$restrict_search.gilist' | |
59 #end if | |
60 #if $restrict_search.negative_gilist: | |
61 -negative_gilist '$restrict_search.negative_gilist' | |
62 #end if | |
63 #if $restrict_search.seqidlist: | |
64 -seqidlist '$restrict_search.seqidlist' | |
65 #end if | |
66 #if $restrict_search.negative_seqidlist: | |
67 -negative_seqidlist '$restrict_search.negative_seqidlist' | |
68 #end if | |
69 #if str($restrict_search.taxids) != '': | |
70 --taxids '$restrict_search.taxids' | |
71 #end if | |
72 #if $restrict_search.taxidlist: | |
73 -taxidlist '$restrict_search.taxidlist' | |
74 #end if | |
75 #if str($restrict_search.negative_taxids) != '': | |
76 --negative_taxids '$restrict_search.negative_taxids' | |
77 #end if | |
78 #if $restrict_search.negative_taxidlist: | |
79 -negative_taxidlist '$restrict_search.negative_taxidlist' | |
80 #end if | |
81 | |
82 ## Mapping options | |
83 -score $mapping.score | |
84 #if $mapping.max_edit_dist > 0: | |
85 -max_edit_dist $mapping.max_edit_dist | |
86 #end if | |
87 -splice '$mapping.splice' | |
88 -reftype '$mapping.reftype' | |
89 | |
90 ## Output unaligned options | |
91 #if str($output_options.report_unaligned_cond.report_unaligned) == 'yes': | |
92 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == 'yes': | |
93 -out_unaligned 'out_unaligned' | |
94 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == 'bam': | |
95 -unaligned_fmt 'sam' | |
96 #else: | |
97 -unaligned_fmt '$output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt' | |
98 #end if | |
99 #end if | |
100 #else: | |
101 -no_unaligned | |
102 #end if | |
103 | |
104 ## Additional output options | |
105 $output_options.no_discordant | |
106 ## Switch default SAM output to be BAM. | |
107 #if str($output_options.outfmt_cond.outfmt) == 'bam': | |
108 $output_options.outfmt_cond.md_tag | |
109 #if $query_mate: | |
110 $output_options.outfmt_cond.no_query_id_trim | |
111 #end if | |
112 -out 'output.sam' | |
113 #if str($output_options.outfmt_cond.output_sort) == 'coordinate': | |
114 && samtools sort -@\${GALAXY_SLOTS:-4} -O bam 'output.sam' > '$output' | |
115 #elif str($output_options.outfmt_cond.output_sort) == 'name': | |
116 && samtools sort -n -@\${GALAXY_SLOTS:-4} -O bam -o 'output.sam' > '$output' | |
117 #else: | |
118 && samtools view -@\${GALAXY_SLOTS:-4} -bS 'output.sam' > '$output' | |
119 #end if | |
120 #else: | |
121 -out '$output' | |
122 -outfmt '$output_options.outfmt_cond.outfmt' | |
123 #end if | |
124 | |
125 ## Convert out_unaligned from SAM to BAM if necessary | |
126 | |
127 #if str($output_options.report_unaligned_cond.report_unaligned) == 'yes': | |
128 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == 'yes': | |
129 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == 'bam': | |
130 #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.output_sort) == 'coordinate': | |
131 && samtools sort -@\${GALAXY_SLOTS:-4} -O bam -o 'out_unaligned' > '$output_unaligned' | |
132 #elif str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.output_sort) == 'name': | |
133 && samtools sort -n -@\${GALAXY_SLOTS:-4} -O bam -o 'out_unaligned' > '$output_unaligned' | |
134 #else: | |
135 && samtools view -@\${GALAXY_SLOTS:-4} -bS 'out_unaligned' > '$output_unaligned' | |
136 #end if | |
137 #else: | |
138 && mv 'out_unaligned' '$output_unaligned' | |
139 #end if | |
140 #end if | |
141 #end if | |
142 ]]></command> | |
143 <inputs> | |
144 <param argument="-query" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" label="Query file" help="Fasta or fastqsanger, optionally gzipped"/> | |
145 <param argument="-query_mate" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" optional="true" label="Query mate file (optional)" help="Fasta or fastqsanger, optionally gzipped"/> | |
146 <conditional name="db_opts"> | |
147 <param name="db_opts_selector" type="select" label="Subject database/sequences"> | |
148 <option value="histdb" selected="true">blast database from your history</option> | |
149 <option value="db">Locally installed blast database</option> | |
150 <option value="file">fasta file from your history (see warning in the tool help section below)</option> | |
151 </param> | |
152 <when value="histdb"> | |
153 <param name="histdb" type="data" format="blastdbn" label="Nucleotide blast database"/> | |
154 </when> | |
155 <when value="db"> | |
156 <param name="database" type="select" multiple="true" optional="false" label="Nucleotide blast database"> | |
157 <options from_data_table="blastdb"/> | |
158 </param> | |
159 </when> | |
160 <when value="file"> | |
161 <param argument="-subject" type="data" format="fasta,fasta.gz" label="Nucleotide fasta subject file to use instead of a database"/> | |
162 </when> | |
163 </conditional> | |
164 <section name="general_search" title="General search"> | |
165 <param argument="-word_size" type="integer" value="18" min="12" label="Minimum number of consecutive bases matching exactly"/> | |
166 <param argument="-gapopen" type="integer" value="0" min="0" label="Cost to open a gap"/> | |
167 <param argument="-gapextend" type="integer" value="0" min="0" label="Cost to extend a gap"/> | |
168 <param argument="-penalty" type="integer" value="-4" max="0" label="Penalty for a nucleotide mismatch"/> | |
169 <param argument="-max_intron_length" type="integer" value="500000" min="0" label="Maximum allowed intron length"/> | |
170 </section> | |
171 <section name="query_filtering" title="Query filtering"> | |
172 <param argument="-lcase_masking" type="boolean" truevalue="-lcase_masking" falsevalue="" checked="false" label="Use lower case filtering in subject sequences?"/> | |
173 <param argument="-validate_seqs" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Reject low quality sequences?"/> | |
174 <param argument="-limit_lookup" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Remove word seeds with high frequency in the searched database?"/> | |
175 <param argument="-max_db_word_count" type="integer" value="30" min="0" label="Words that appear more than this number of times in the database will be masked in the lookup table"/> | |
176 <param argument="-lookup_stride" type="integer" value="0" min="0" label="Number of words to skip after collecting one while creating a lookup table"/> | |
177 </section> | |
178 <section name="restrict_search" title="Restrict database search"> | |
179 <param argument="-gilist" type="data" format="tabular" optional="true" label="Tabular file containing list of GIs to which to restrict database search" help="Available only for database searches"/> | |
180 <param argument="-negative_gilist" type="data" format="tabular" optional="true" label="Tabular file containing list of GIs to restrict database search to everything except the specified GIs" help="Available only for database searches"/> | |
181 <param argument="-seqidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of SeqIDs to which to restrict database search" help="Available only for database searches"/> | |
182 <param argument="-negative_seqidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of SeqIDs to restrict database search to everything except the specified SeqIDs" help="Available only for database searches"/> | |
183 <param argument="-taxids" type="text" optional="true" label="Comma-separated list of taxonomy IDs to which to restrict database search" help="Available only for database searches"> | |
184 <expand macro="sanitize_query" validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation"/> | |
185 </param> | |
186 <param argument="-taxidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of taxonomy IDs to which to restrict database search" help="Available only for database searches"/> | |
187 <param argument="-negative_taxids" type="text" optional="true" label="Comma-separated list of taxonomy IDs to restrict database search to everything except the specified taxonomy IDs" help="Available only for database searches"> | |
188 <expand macro="sanitize_query" validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation"/> | |
189 </param> | |
190 <param argument="-negative_taxidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of taxonomy IDs to restrict database search to everythin except the specified taxonomy IDs" help="Available only for database searches"/> | |
191 </section> | |
192 <section name="mapping" title="Mapping"> | |
193 <param argument="-score" type="integer" value="0" min="0" label="Cutoff score for accepting alignments" help="Zero value ignores"/> | |
194 <param argument="-max_edit_dist" type="integer" value="0" min="0" label="Cutoff edit distance for accepting an alignment" help="Zero value is unlimited"/> | |
195 <param argument="-splice" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Search for spliced alignments?"/> | |
196 <param argument="-reftype" type="select" label="Type of the reference"> | |
197 <option value="genome" selected="true">genome</option> | |
198 <option value="transcriptome">transcriptome</option> | |
199 </param> | |
200 </section> | |
201 <section name="output_options" title="Output options"> | |
202 <conditional name="report_unaligned_cond"> | |
203 <param name="report_unaligned" type="select" label="Report unaligned reads?"> | |
204 <option value="yes" selected="true">Yes</option> | |
205 <option value="no">No</option> | |
206 </param> | |
207 <when value="yes"> | |
208 <conditional name="report_unaligned_separately_cond"> | |
209 <param name="report_unaligned_separately" type="select" label="Output unaligned reads to a separate file?" help="Select No to output all reads to the same file"> | |
210 <option value="no" selected="true">No</option> | |
211 <option value="yes">Yes</option> | |
212 </param> | |
213 <when value="no"/> | |
214 <when value="yes"> | |
215 <conditional name="unaligned_fmt_cond"> | |
216 <param argument="-unaligned_fmt" type="select" label="Output format for unaligned reads"> | |
217 <option value="bam" selected="true">bam</option> | |
218 <option value="tabular">tabular</option> | |
219 <option value="fasta">fasta</option> | |
220 </param> | |
221 <when value="bam"> | |
222 <expand macro="output_sort_param"/> | |
223 </when> | |
224 <when value="tabular"/> | |
225 <when value="fasta"/> | |
226 </conditional> | |
227 </when> | |
228 </conditional> | |
229 </when> | |
230 <when value="no"/> | |
231 </conditional> | |
232 <conditional name="outfmt_cond"> | |
233 <param argument="-outfmt" type="select" label="Output format"> | |
234 <option value="bam" selected="true">bam</option> | |
235 <option value="tabular">tabular</option> | |
236 </param> | |
237 <when value="bam"> | |
238 <expand macro="output_sort_param"/> | |
239 <param argument="-md_tag" type="boolean" truevalue="-md_tag" falsevalue="" checked="false" label="Include MD tag in BAM output?"/> | |
240 <param argument="-no_query_id_trim" type="boolean" truevalue="-no_query_id_trim" falsevalue="" checked="false" label="Do not trim '.1', '/1', '.2', or '/2' at the end of read ids in BAM output for paired reads?" help="Ignored if no query mate"/> | |
241 </when> | |
242 <when value="tabular"/> | |
243 </conditional> | |
244 <param argument="-no_discordant" type="boolean" truevalue="-no_discordant" falsevalue="" checked="false" label="Suppress discordant alignments for paired reads?" help="Ignored if no query mate"/> | |
245 </section> | |
246 </inputs> | |
247 <outputs> | |
248 <data name="output" format="bam" label="${tool.name} on ${on_string}"> | |
249 <change_format> | |
250 <when input="output.outfmt_cond.outfmt" value="tabular" format="tabular"/> | |
251 </change_format> | |
252 </data> | |
253 <data name="output_unaligned" format="bam" label="${tool.name} on ${on_string}: unaligned reads"> | |
254 <filter>output_options['report_unaligned_cond']['report_unaligned'] == 'yes' and output_options['report_unaligned_cond']['report_unaligned_separately_cond']['report_unaligned_separately'] == 'yes'</filter> | |
255 <change_format> | |
256 <when input="output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt" value="tabular" format="tabular"/> | |
257 <when input="output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt" value="fasta" format="fasta"/> | |
258 </change_format> | |
259 </data> | |
260 </outputs> | |
261 <tests> | |
262 <!-- Single fasta.gz input, subject file --> | |
263 <test expect_num_outputs="1"> | |
264 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> | |
265 <param name="db_opts_selector" value="file"/> | |
266 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> | |
267 <output name="output" ftype="bam"> | |
268 <assert_contents> | |
269 <has_size value="1247" delta="50"/> | |
270 </assert_contents> | |
271 </output> | |
272 </test> | |
273 <!-- Single fasta.gz input, subject file, output unaligned reads separately--> | |
274 <test expect_num_outputs="2"> | |
275 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> | |
276 <param name="db_opts_selector" value="file"/> | |
277 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> | |
278 <param name="report_unaligned_separately" value="yes"/> | |
279 <param name="unaligned_fmt" value="tabular"/> | |
280 <output name="output" ftype="bam"> | |
281 <assert_contents> | |
282 <has_size value="492" delta="50"/> | |
283 </assert_contents> | |
284 </output> | |
285 <output name="output_unaligned" ftype="tabular"> | |
286 <assert_contents> | |
287 <has_size value="959"/> | |
288 </assert_contents> | |
289 </output> | |
290 </test> | |
291 <!-- Single fasta.gz input, subject file, gilist file, results in error --> | |
292 <test expect_failure="true"> | |
293 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> | |
294 <param name="db_opts_selector" value="file"/> | |
295 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> | |
296 <param name="report_unaligned_separately" value="yes"/> | |
297 <param name="gilist" value="gilist1.tabular" ftype="tabular"/> | |
298 <assert_stderr> | |
299 <has_text text="Incompatible with argument:"/> | |
300 </assert_stderr> | |
301 </test> | |
302 <!-- Single fasta.gz input, cached db, taxidlist, results in error --> | |
303 <test expect_failure="true"> | |
304 <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> | |
305 <param name="db_opts_selector" value="db"/> | |
306 <param name="database" value="phiX174"/> | |
307 <param name="taxidlist" value="taxids.tabular" ftype="tabular"/> | |
308 <assert_stderr> | |
309 <has_text text="Taxonomy filtering is not supported in v4 BLAST dbs"/> | |
310 </assert_stderr> | |
311 </test> | |
312 <!-- Paired fastqsanger.gz input, subject file --> | |
313 <test expect_num_outputs="1"> | |
314 <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/> | |
315 <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/> | |
316 <param name="db_opts_selector" value="file"/> | |
317 <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> | |
318 <output name="output" ftype="bam"> | |
319 <assert_contents> | |
320 <has_size value="62080" delta="50"/> | |
321 </assert_contents> | |
322 </output> | |
323 </test> | |
324 <!-- Paired fastqsanger.gz input, cached blast db --> | |
325 <test expect_num_outputs="1"> | |
326 <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/> | |
327 <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/> | |
328 <param name="db_opts_selector" value="db"/> | |
329 <param name="database" value="phiX174"/> | |
330 <output name="output" ftype="bam"> | |
331 <assert_contents> | |
332 <has_size value="62079" delta="50"/> | |
333 </assert_contents> | |
334 </output> | |
335 </test> | |
336 </tests> | |
337 <help><![CDATA[ | |
338 **What it does** | |
339 | |
340 .. class:: warningmark | |
341 | |
342 In addition to a BLAST database, you can also search against a fasta file of subject (target) sequences. However, this is not | |
343 advised because it is slower (only one CPU is used), but more importantly gives e-values for pairwise searches (very small | |
344 e-values which will look overly signficiant). In most cases you should convert the fasta file into a blast database using | |
345 *makeblastdb* and search against that. | |
346 | |
347 Magic-BLAST is a tool for mapping large next-generation RNA or DNA sequencing runs against a whole genome or transcriptome. | |
348 Each alignment optimizes a composite score, taking into account simultaneously the two reads of a pair, and in case of RNA-seq, | |
349 locating the candidate introns and adding up the score of all exons. This is very different from other versions of BLAST, where | |
350 each exon is scored as a separate hit and read-pairing is ignored. | |
351 | |
352 Magic-BLAST incorporates within the NCBI BLAST code framework ideas developed in the NCBI Magic pipeline, in particular hit | |
353 extensions by local walk and jump, and recursive clipping of mismatches near the edges of the reads, which avoids accumulating | |
354 artefactual mismatches near splice sites and is needed to distinguish short indels from substitutions near the edges. | |
355 | |
356 The tool accepts a single or paired set of reads in fasta or fastqsanger format and produces bam or tabular output. | |
357 | |
358 More information about Magic-BLAST is available in the | |
359 `online documentation <https://ncbi.github.io/magicblast/>`_. | |
360 ]]></help> | |
361 <expand macro="citations"/> | |
362 </tool> |