Mercurial > repos > artbio > blastparser_and_hits
diff BlastParser_and_hits.xml @ 0:9dfb65ebb02e draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/blastparser_and_hits commit 48132e5edac97d54804ccbaf620068a5fb800bdc
author | artbio |
---|---|
date | Sun, 15 Oct 2017 18:43:37 -0400 |
parents | |
children | 9beb85dba280 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BlastParser_and_hits.xml Sun Oct 15 18:43:37 2017 -0400 @@ -0,0 +1,94 @@ +<tool id="BlastParser_and_hits" name="Parse blast output and compile hits" version="2.5.0"> +<description>for virus discovery</description> +<requirements></requirements> +<command><![CDATA[ + python '$__tool_directory__'/BlastParser_and_hits.py + --sequences '$sequences' + --blast '$blast' + --tabularOutput '$tabularOutput' + --fastaOutput '$fastaOutput' + --flanking $flanking + --mode $mode + ## Additional parameters. + #if $additional_filters.use_filters == "yes": + --filter_relativeCov $additional_filters.filter_relativeCov + --filter_maxScore $additional_filters.filter_maxScore + --filter_meanScore $additional_filters.filter_meanScore + --filter_term_in "$additional_filters.filter_term_in" + --filter_term_out "$additional_filters.filter_term_out" + #end if + --al_sequences '$al_sequences' + --un_sequences '$un_sequences' + --dataset_name "$blast.element_identifier" + + ]]></command> +<inputs> + <param name="sequences" type="data" format="fasta" label="fasta sequences that have been blasted" /> + <param name="blast" type="data" format="tabular" label="The blast output you wish to parse"> + <validator type="expression" message="Blast file must have 13 columns">value.metadata.columns == 13</validator> + </param> + <param name="flanking" type="integer" value= "5" label="Number of flanking nucleotides to add to hits for CAP3 assembly"/> + <param name="mode" type="select" label="Extensive or compact reporting mode" help="display (extensive) or not (compact) the oases contigs"> + <option value="verbose" selected="true">extensive</option> + <option value="short">compact</option> + </param> + <conditional name="additional_filters"> + <param name="use_filters" type="select" label="Use Additional Filters?"> + <option value="no">No</option> + <option value="yes">Yes</option> + </param> + <when value="no"> + </when> + <when value="yes"> + <param name="filter_relativeCov" type="float" value="0" max="1" label="Minimum Relative Subject Coverage" help=""/> + <param name="filter_maxScore" type="float" value="0" label="Minimum maximum BitScore" help=""/> + <param name="filter_meanScore" type="float" value="0" label="Minimum mean BitScore" help=""/> + <param name="filter_term_in" type="text" value="" label="filter the subject list with a keyword" help=""/> + <param name="filter_term_out" type="text" value="" label="filter the subject list excluding a keyword" help=""/> + </when> + </conditional> +</inputs> +<outputs> + <data name="tabularOutput" format="tabular" label="blast analysis, by subjects"/> + <data name="fastaOutput" format="fasta" label="hits"/> + <data name="al_sequences" format="fasta" label="Blast aligned sequences"/> + <data name="un_sequences" format="fasta" label="Blast unaligned sequences"/> +</outputs> + + <tests> + <test> + <param ftype="fasta" name="sequences" value="input.fa" /> + <param ftype="tabular" name="blast" value="blast.tab" /> + <param name="flanking" value="5" /> + <param name="use_filters" value="no" /> + <param name="mode" value="verbose" /> + <output name="tabularOutput" ftype="tabular" file="output.tab" /> + <output name="fastaOutput" ftype="fasta" file="output.fa" /> + <output name="al_sequences" ftype="fasta" file="al_sequences.fa" /> + <output name="un_sequences" ftype="fasta" file="un_sequences.fa" /> + </test> + </tests> + +<help> + +**What it does** + +Parse blast output for viruses genome assembly. + +Takes as inputs + + - 1. the fasta sequences that have been submitted to blast + - 2. a blast alignment in a tabular format. **Importantly** this tabular output must contains the 12 standard columns (see blast documentation), **plus a column 13** that will report the length of the subject sequence (slen). When you use blast tools prior using this tool, remember to **check the appropriate box** to get the 13th column in the blast tabular output. + - 3. the numbers of flanking nucleotides to be recovered at the ends of blast hit sequences + +The tool returns 4 datasets + + - 1. the fasta input sequences that produced significant blast hits + - 2. the fasta sequences that did not produced significant blast hits + - 3. the sequences of the blast hits, plus the flanking sequences (as specified in the tool form). This dataset may be further used in metavisitor workflows to produce contigs of hits. + - 4. and the parsing of the blast alignments which summarizes the blast results by "subject" sequences (blast analysis, by subjects) + +This latter parsing dataset may be customized by tuning the reporting mode and/or using filters + +</help> +</tool>