Mercurial > repos > iuc > magicblast
diff magicblast.xml @ 0:e6799e98c5fb draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/blast commit 15fc6c06f743bae276ff02dc405e7da61a07bd08"
author | iuc |
---|---|
date | Tue, 05 Apr 2022 12:11:08 +0000 |
parents | |
children | aea6702a3cd5 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/magicblast.xml Tue Apr 05 12:11:08 2022 +0000 @@ -0,0 +1,362 @@ +<tool id="magicblast" name="Magic-BLAST: map large RNA or DNA sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>against a whole genome or transcriptome</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +#import os + +magicblast +-num_threads \${GALAXY_SLOTS:-8} +#if $query.is_of_type('fasta.gz', 'fastqsanger.gz'): + -query <(gunzip -c '${query}') +#else: + -query '${query}' +#end if +#if $query_mate: + -paired + #if $query.is_of_type('fasta.gz', 'fastqsanger.gz'): + -query_mate <(gunzip -c '${query}') + #else: + -query_mate '${query}' + #end if +#end if + +#if $query.is_of_type('fastqsanger', 'fastqsanger.gz'): + -infmt fastq +#end if + +#if $db_opts.db_opts_selector == "histdb": + -db '${os.path.join($db_opts.histdb.extra_files_path, "blastdb")}' +#elif $db_opts.db_opts_selector == "db": + -db '${os.path.join($db_opts.database.fields.path, "blastdb")}' +#else: + #if $db_opts.subject.is_of_type('fasta.gz'): + -subject <(gunzip -c '${$db_opts.subject}') + #else: + -subject '${db_opts.subject}' + #end if +#end if + +## General search options +-word_size $general_search.word_size +-gapopen $general_search.gapopen +-gapextend $general_search.gapextend +-penalty $general_search.penalty +-max_intron_length $general_search.max_intron_length + +## Query filtering options +$query_filtering.lcase_masking +-validate_seqs $query_filtering.validate_seqs +-limit_lookup $query_filtering.limit_lookup +-max_db_word_count $query_filtering.max_db_word_count +-lookup_stride $query_filtering.lookup_stride + +## Restrict database search +#if $restrict_search.gilist: + -gilist '$restrict_search.gilist' +#end if +#if $restrict_search.negative_gilist: + -negative_gilist '$restrict_search.negative_gilist' +#end if +#if $restrict_search.seqidlist: + -seqidlist '$restrict_search.seqidlist' +#end if +#if $restrict_search.negative_seqidlist: + -negative_seqidlist '$restrict_search.negative_seqidlist' +#end if +#if str($restrict_search.taxids) != '': + --taxids '$restrict_search.taxids' +#end if +#if $restrict_search.taxidlist: + -taxidlist '$restrict_search.taxidlist' +#end if +#if str($restrict_search.negative_taxids) != '': + --negative_taxids '$restrict_search.negative_taxids' +#end if +#if $restrict_search.negative_taxidlist: + -negative_taxidlist '$restrict_search.negative_taxidlist' +#end if + +## Mapping options +-score $mapping.score +#if $mapping.max_edit_dist > 0: + -max_edit_dist $mapping.max_edit_dist +#end if +-splice '$mapping.splice' +-reftype '$mapping.reftype' + +## Output unaligned options +#if str($output_options.report_unaligned_cond.report_unaligned) == 'yes': + #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == 'yes': + -out_unaligned 'out_unaligned' + #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == 'bam': + -unaligned_fmt 'sam' + #else: + -unaligned_fmt '$output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt' + #end if + #end if +#else: + -no_unaligned +#end if + +## Additional output options +$output_options.no_discordant +## Switch default SAM output to be BAM. +#if str($output_options.outfmt_cond.outfmt) == 'bam': + $output_options.outfmt_cond.md_tag + #if $query_mate: + $output_options.outfmt_cond.no_query_id_trim + #end if + -out 'output.sam' + #if str($output_options.outfmt_cond.output_sort) == 'coordinate': + && samtools sort -@\${GALAXY_SLOTS:-4} -O bam 'output.sam' > '$output' + #elif str($output_options.outfmt_cond.output_sort) == 'name': + && samtools sort -n -@\${GALAXY_SLOTS:-4} -O bam -o 'output.sam' > '$output' + #else: + && samtools view -@\${GALAXY_SLOTS:-4} -bS 'output.sam' > '$output' + #end if +#else: + -out '$output' + -outfmt '$output_options.outfmt_cond.outfmt' +#end if + +## Convert out_unaligned from SAM to BAM if necessary + +#if str($output_options.report_unaligned_cond.report_unaligned) == 'yes': + #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.report_unaligned_separately) == 'yes': + #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt) == 'bam': + #if str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.output_sort) == 'coordinate': + && samtools sort -@\${GALAXY_SLOTS:-4} -O bam -o 'out_unaligned' > '$output_unaligned' + #elif str($output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.output_sort) == 'name': + && samtools sort -n -@\${GALAXY_SLOTS:-4} -O bam -o 'out_unaligned' > '$output_unaligned' + #else: + && samtools view -@\${GALAXY_SLOTS:-4} -bS 'out_unaligned' > '$output_unaligned' + #end if + #else: + && mv 'out_unaligned' '$output_unaligned' + #end if + #end if +#end if + ]]></command> + <inputs> + <param argument="-query" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" label="Query file" help="Fasta or fastqsanger, optionally gzipped"/> + <param argument="-query_mate" type="data" format="fasta,fasta.gz,fastqsanger,fastqsanger.gz" optional="true" label="Query mate file (optional)" help="Fasta or fastqsanger, optionally gzipped"/> + <conditional name="db_opts"> + <param name="db_opts_selector" type="select" label="Subject database/sequences"> + <option value="histdb" selected="true">blast database from your history</option> + <option value="db">Locally installed blast database</option> + <option value="file">fasta file from your history (see warning in the tool help section below)</option> + </param> + <when value="histdb"> + <param name="histdb" type="data" format="blastdbn" label="Nucleotide blast database"/> + </when> + <when value="db"> + <param name="database" type="select" multiple="true" optional="false" label="Nucleotide blast database"> + <options from_data_table="blastdb"/> + </param> + </when> + <when value="file"> + <param argument="-subject" type="data" format="fasta,fasta.gz" label="Nucleotide fasta subject file to use instead of a database"/> + </when> + </conditional> + <section name="general_search" title="General search"> + <param argument="-word_size" type="integer" value="18" min="12" label="Minimum number of consecutive bases matching exactly"/> + <param argument="-gapopen" type="integer" value="0" min="0" label="Cost to open a gap"/> + <param argument="-gapextend" type="integer" value="0" min="0" label="Cost to extend a gap"/> + <param argument="-penalty" type="integer" value="-4" max="0" label="Penalty for a nucleotide mismatch"/> + <param argument="-max_intron_length" type="integer" value="500000" min="0" label="Maximum allowed intron length"/> + </section> + <section name="query_filtering" title="Query filtering"> + <param argument="-lcase_masking" type="boolean" truevalue="-lcase_masking" falsevalue="" checked="false" label="Use lower case filtering in subject sequences?"/> + <param argument="-validate_seqs" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Reject low quality sequences?"/> + <param argument="-limit_lookup" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Remove word seeds with high frequency in the searched database?"/> + <param argument="-max_db_word_count" type="integer" value="30" min="0" label="Words that appear more than this number of times in the database will be masked in the lookup table"/> + <param argument="-lookup_stride" type="integer" value="0" min="0" label="Number of words to skip after collecting one while creating a lookup table"/> + </section> + <section name="restrict_search" title="Restrict database search"> + <param argument="-gilist" type="data" format="tabular" optional="true" label="Tabular file containing list of GIs to which to restrict database search" help="Available only for database searches"/> + <param argument="-negative_gilist" type="data" format="tabular" optional="true" label="Tabular file containing list of GIs to restrict database search to everything except the specified GIs" help="Available only for database searches"/> + <param argument="-seqidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of SeqIDs to which to restrict database search" help="Available only for database searches"/> + <param argument="-negative_seqidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of SeqIDs to restrict database search to everything except the specified SeqIDs" help="Available only for database searches"/> + <param argument="-taxids" type="text" optional="true" label="Comma-separated list of taxonomy IDs to which to restrict database search" help="Available only for database searches"> + <expand macro="sanitize_query" validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation"/> + </param> + <param argument="-taxidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of taxonomy IDs to which to restrict database search" help="Available only for database searches"/> + <param argument="-negative_taxids" type="text" optional="true" label="Comma-separated list of taxonomy IDs to restrict database search to everything except the specified taxonomy IDs" help="Available only for database searches"> + <expand macro="sanitize_query" validinitial="string.ascii_letters,string.digits,string.whitespace,string.punctuation"/> + </param> + <param argument="-negative_taxidlist" type="data" format="tabular" optional="true" label="Tabular file containing list of taxonomy IDs to restrict database search to everythin except the specified taxonomy IDs" help="Available only for database searches"/> + </section> + <section name="mapping" title="Mapping"> + <param argument="-score" type="integer" value="0" min="0" label="Cutoff score for accepting alignments" help="Zero value ignores"/> + <param argument="-max_edit_dist" type="integer" value="0" min="0" label="Cutoff edit distance for accepting an alignment" help="Zero value is unlimited"/> + <param argument="-splice" type="boolean" truevalue="true" falsevalue="false" checked="true" label="Search for spliced alignments?"/> + <param argument="-reftype" type="select" label="Type of the reference"> + <option value="genome" selected="true">genome</option> + <option value="transcriptome">transcriptome</option> + </param> + </section> + <section name="output_options" title="Output options"> + <conditional name="report_unaligned_cond"> + <param name="report_unaligned" type="select" label="Report unaligned reads?"> + <option value="yes" selected="true">Yes</option> + <option value="no">No</option> + </param> + <when value="yes"> + <conditional name="report_unaligned_separately_cond"> + <param name="report_unaligned_separately" type="select" label="Output unaligned reads to a separate file?" help="Select No to output all reads to the same file"> + <option value="no" selected="true">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"/> + <when value="yes"> + <conditional name="unaligned_fmt_cond"> + <param argument="-unaligned_fmt" type="select" label="Output format for unaligned reads"> + <option value="bam" selected="true">bam</option> + <option value="tabular">tabular</option> + <option value="fasta">fasta</option> + </param> + <when value="bam"> + <expand macro="output_sort_param"/> + </when> + <when value="tabular"/> + <when value="fasta"/> + </conditional> + </when> + </conditional> + </when> + <when value="no"/> + </conditional> + <conditional name="outfmt_cond"> + <param argument="-outfmt" type="select" label="Output format"> + <option value="bam" selected="true">bam</option> + <option value="tabular">tabular</option> + </param> + <when value="bam"> + <expand macro="output_sort_param"/> + <param argument="-md_tag" type="boolean" truevalue="-md_tag" falsevalue="" checked="false" label="Include MD tag in BAM output?"/> + <param argument="-no_query_id_trim" type="boolean" truevalue="-no_query_id_trim" falsevalue="" checked="false" label="Do not trim '.1', '/1', '.2', or '/2' at the end of read ids in BAM output for paired reads?" help="Ignored if no query mate"/> + </when> + <when value="tabular"/> + </conditional> + <param argument="-no_discordant" type="boolean" truevalue="-no_discordant" falsevalue="" checked="false" label="Suppress discordant alignments for paired reads?" help="Ignored if no query mate"/> + </section> + </inputs> + <outputs> + <data name="output" format="bam" label="${tool.name} on ${on_string}"> + <change_format> + <when input="output.outfmt_cond.outfmt" value="tabular" format="tabular"/> + </change_format> + </data> + <data name="output_unaligned" format="bam" label="${tool.name} on ${on_string}: unaligned reads"> + <filter>output_options['report_unaligned_cond']['report_unaligned'] == 'yes' and output_options['report_unaligned_cond']['report_unaligned_separately_cond']['report_unaligned_separately'] == 'yes'</filter> + <change_format> + <when input="output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt" value="tabular" format="tabular"/> + <when input="output_options.report_unaligned_cond.report_unaligned_separately_cond.unaligned_fmt_cond.unaligned_fmt" value="fasta" format="fasta"/> + </change_format> + </data> + </outputs> + <tests> + <!-- Single fasta.gz input, subject file --> + <test expect_num_outputs="1"> + <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> + <param name="db_opts_selector" value="file"/> + <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> + <output name="output" ftype="bam"> + <assert_contents> + <has_size value="1247" delta="50"/> + </assert_contents> + </output> + </test> + <!-- Single fasta.gz input, subject file, output unaligned reads separately--> + <test expect_num_outputs="2"> + <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> + <param name="db_opts_selector" value="file"/> + <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> + <param name="report_unaligned_separately" value="yes"/> + <param name="unaligned_fmt" value="tabular"/> + <output name="output" ftype="bam"> + <assert_contents> + <has_size value="492" delta="50"/> + </assert_contents> + </output> + <output name="output_unaligned" ftype="tabular"> + <assert_contents> + <has_size value="959"/> + </assert_contents> + </output> + </test> + <!-- Single fasta.gz input, subject file, gilist file, results in error --> + <test expect_failure="true"> + <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> + <param name="db_opts_selector" value="file"/> + <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> + <param name="report_unaligned_separately" value="yes"/> + <param name="gilist" value="gilist1.tabular" ftype="tabular"/> + <assert_stderr> + <has_text text="Incompatible with argument:"/> + </assert_stderr> + </test> + <!-- Single fasta.gz input, cached db, taxidlist, results in error --> + <test expect_failure="true"> + <param name="query" value="query1.fasta.gz" ftype="fasta.gz"/> + <param name="db_opts_selector" value="db"/> + <param name="database" value="phiX174"/> + <param name="taxidlist" value="taxids.tabular" ftype="tabular"/> + <assert_stderr> + <has_text text="Taxonomy filtering is not supported in v4 BLAST dbs"/> + </assert_stderr> + </test> + <!-- Paired fastqsanger.gz input, subject file --> + <test expect_num_outputs="1"> + <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/> + <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/> + <param name="db_opts_selector" value="file"/> + <param name="subject" value="subject1.fasta.gz" ftype="fasta.gz"/> + <output name="output" ftype="bam"> + <assert_contents> + <has_size value="62080" delta="50"/> + </assert_contents> + </output> + </test> + <!-- Paired fastqsanger.gz input, cached blast db --> + <test expect_num_outputs="1"> + <param name="query" value="query_forward1.fastqsanger.gz" ftype="fastqsanger.gz"/> + <param name="query_mate" value="query_reverse1.fastqsanger.gz" ftype="fastqsanger.gz"/> + <param name="db_opts_selector" value="db"/> + <param name="database" value="phiX174"/> + <output name="output" ftype="bam"> + <assert_contents> + <has_size value="62079" delta="50"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +**What it does** + +.. class:: warningmark + +In addition to a BLAST database, you can also search against a fasta file of subject (target) sequences. However, this is not +advised because it is slower (only one CPU is used), but more importantly gives e-values for pairwise searches (very small +e-values which will look overly signficiant). In most cases you should convert the fasta file into a blast database using +*makeblastdb* and search against that. + +Magic-BLAST is a tool for mapping large next-generation RNA or DNA sequencing runs against a whole genome or transcriptome. +Each alignment optimizes a composite score, taking into account simultaneously the two reads of a pair, and in case of RNA-seq, +locating the candidate introns and adding up the score of all exons. This is very different from other versions of BLAST, where +each exon is scored as a separate hit and read-pairing is ignored. + +Magic-BLAST incorporates within the NCBI BLAST code framework ideas developed in the NCBI Magic pipeline, in particular hit +extensions by local walk and jump, and recursive clipping of mismatches near the edges of the reads, which avoids accumulating +artefactual mismatches near splice sites and is needed to distinguish short indels from substitutions near the edges. + +The tool accepts a single or paired set of reads in fasta or fastqsanger format and produces bam or tabular output. + +More information about Magic-BLAST is available in the +`online documentation <https://ncbi.github.io/magicblast/>`_. + ]]></help> + <expand macro="citations"/> +</tool>