Mercurial > repos > bgruening > diamond
changeset 9:f921014aba5a draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diamond commit 828c844036743151594e57cc19811f4c8d9179b3"
author | iuc |
---|---|
date | Sat, 27 Nov 2021 09:48:10 +0000 |
parents | 54f751e413f4 |
children | 1e3323a44643 |
files | diamond.xml diamond_makedb.xml diamond_view.xml macros.xml test-data/diamond_results.tabular test-data/protein.fasta |
diffstat | 6 files changed, 84 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/diamond.xml Mon Mar 22 13:21:23 2021 +0000 +++ b/diamond.xml Sat Nov 27 09:48:10 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="bg_diamond" name="Diamond" version="@VERSION@.0" profile="19.01"> +<tool id="bg_diamond" name="Diamond" version="@TOOL_VERSION@.1" profile="19.01"> <description>alignment tool for short sequences against a protein database</description> <macros> <import>macros.xml</import> @@ -62,8 +62,23 @@ --query-cover '$query_cover' --subject-cover '$subject_cover' --block-size '$sens_cond.block_size' - #if str($unal) == '1': - --unal 1 --un '$unalqueries' + #if $output_unal + #if "--un" in $output_unal + --un '$unalqueries' + #if $query.ext.startswith("fasta"): + --unfmt fasta + #else + --unfmt fastq + #end if + #end if + #if "--al" in $output_unal + --al '$alqueries' + #if $query.ext.startswith("fasta"): + --alfmt fasta + #else + --alfmt fastq + #end if + #end if #end if #if $tax_cond.tax_select == 'file': --taxonlist `cat '$tax_cond.taxonlistfile' | grep -v "^#" | grep -v "^$" | tr "\n" "," | sed 's/,$//'` @@ -79,7 +94,7 @@ <option value="blastx">Align DNA query sequences (blastx)</option> </param> <when value="blastx"> - <param name="query_gencode" argument="--query-gencode" type="select" label="Genetic code used for translation of query in BLASTX mode" help=""> + <param argument="--query-gencode" type="select" label="Genetic code used for translation of query in BLASTX mode" help=""> <option value="1">The Standard Code</option> <option value="2">The Vertebrate Mitochondrial Code</option> <option value="3">The Yeast Mitochondrial Code</option> @@ -100,7 +115,7 @@ <option value="25">Candidate Division SR1 and Gracilibacteria Code</option> <option value="26">Pachysolen tannophilus Nuclear Code</option> </param> - <param argument="--min-orf" name="min_orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature" /> + <param argument="--min-orf" type="integer" value="1" label="ignore translated sequences without an open reading frame of at least this length" help="By default this feature is disabled for sequences of length below 30, set to 20 for sequences of length below 100, and set to 40 otherwise. Setting this option to 1 will disable this feature" /> <param name="query_strand" argument="--strand" type="select" label="query strands to search" help=""> <option value="both" selected="True">Both</option> @@ -113,21 +128,21 @@ <option value="no" selected="true">no</option> </param> <when value="yes"> - <param argument="--range-culling" name="range_culling" type="boolean" truevalue="--range-culling" falsevalue="" checked="false" label="restrict hit culling to overlapping query ranges" help="This feature is designed for long query DNA sequences that may span several genes. In these cases, the default of reporting the 25 best overall hits could cause hits to a lower scoring gene to be overshadowed. But just increasing the number of alignments reported will bloat the output size and reduce performance. Using this feature along with -k 25 (default), a hit will only be deleted if at least 50% of its query range is spanned by at least 25 higher or equal scoring hits. Using this feature along with --top 10, a hit will only be deleted if its score is more than 10% lower than that of a higher scoring hit over at least 50% of its query range. The percentage is configurable using --range-cover. Note that this feature is currently only available in frameshift alignment mode"/> + <param argument="--range-culling" type="boolean" truevalue="--range-culling" falsevalue="" checked="false" label="restrict hit culling to overlapping query ranges" help="This feature is designed for long query DNA sequences that may span several genes. In these cases, the default of reporting the 25 best overall hits could cause hits to a lower scoring gene to be overshadowed. But just increasing the number of alignments reported will bloat the output size and reduce performance. Using this feature along with -k 25 (default), a hit will only be deleted if at least 50% of its query range is spanned by at least 25 higher or equal scoring hits. Using this feature along with --top 10, a hit will only be deleted if its score is more than 10% lower than that of a higher scoring hit over at least 50% of its query range. The percentage is configurable using --range-cover. Note that this feature is currently only available in frameshift alignment mode"/> <param argument="--frameshift" type="integer" value="0" label="frame shift penalty" help="Values around 15 are reasonable for this parameter. Enabling this feature will have the aligner tolerate missing bases in DNA sequences and is most recommended for long, error-prone sequences like MinION reads. In the pairwise output format, frameshifts will be indicated by \ and / for a shift by +1 and -1 nucleotide in the direction of translation respectively." /> </when> <when value="no"/> </conditional> - <param name="comp_based_stats" argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> + <param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> <option value="0">Disable</option> <option value="1" selected="True">Default mode (Hauser, 2016)</option> </param> </when> <when value="blastp"> - <param name="no_self_hits" argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" label="suppress reporting of identical self hits?" help=""/> + <param argument="--no-self-hits" type="boolean" truevalue="--no-self-hits" falsevalue="" checked="true" label="suppress reporting of identical self hits?" help=""/> - <param name="comp_based_stats" argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> + <param argument="--comp-based-stats" type="select" label="Composition based statistics" help="Compositionally biased sequences often cause false positive matches, which are effectively filtered by this algorithm in a way similar to the composition based statistics used by BLAST"> <option value="0">Disable</option> <option value="1" selected="True">Default mode (Hauser, 2016)</option> <option value="2">Compositional matrix adjust conditioned on sequence properties, simplified (Yu, 2005)</option> @@ -234,18 +249,24 @@ </conditional> <expand macro="hit_filter_macro" /> <param argument="--id" type="integer" value="0" label="Minimum identity percentage to report an alignment" help="" /> - <param name="query_cover" argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="" /> - <param name="subject_cover" argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="" /> - <param argument="--unal" type="boolean" truevalue="1" falsevalue="0" checked="false" label="report unaligned queries" help=""/> + <param argument="--query-cover" type="integer" value="0" label="Minimum query cover percentage to report an alignment" help="" /> + <param argument="--subject-cover" type="integer" value="0" label="Minimum subject cover percentage to report an alignment" help="" /> + <param name="output_unal" type="select" optional="true" multiple="true" label="Output aligned/unaligned queries to separate file" help=""> + <option value="--un">Output unaligned queries (--un)</option> + <option value="--al">Output alaligned queries (--al)</option> + </param> </inputs> <outputs> <expand macro="output_macro" /> - <data format="fasta" name="unalqueries" label="${tool.name} on ${on_string} (unaligned queries)"> - <filter>unal == "1"</filter> + <data format_source="query" name="unalqueries" label="${tool.name} on ${on_string}: unaligned queries"> + <filter>output_unal and "--un" in output_unal</filter> + </data> + <data format_source="query" name="alqueries" label="${tool.name} on ${on_string}: aligned queries"> + <filter>output_unal and "--un" in output_unal</filter> </data> </outputs> <tests> - <test> + <test expect_num_outputs="3"> <conditional name="method_cond"> <param name="method_select" value="blastp" /> </conditional> @@ -256,13 +277,15 @@ </conditional> <conditional name="output"> <param name="outfmt" value="6"/> - <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,cigar,scovhsp,sskingdoms,skingdoms,sphylums"/> + <!-- removed ,cigar from test: https://github.com/bbuchfink/diamond/issues/532 --> + <param name="fields" value="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,scovhsp,sskingdoms,skingdoms,sphylums"/> + <param name="unal" value="true"/> </conditional> <conditional name="sens_cond"> <param name="sensitivity" value=""/> </conditional> <param name="matrix" value="BLOSUM62"/> - <param name="comp-based-stat" value="1"/> + <param name="comp_based_stats" value="1"/> <param name="masking" value="1"/> <conditional name="hit_filter"> <param name="hit_filter_select" value="max"/> @@ -277,9 +300,20 @@ <conditional name="sens_cond"> <param name="block_size" value="2"/> </conditional> + <param name="output_unal" value="--al,--un"/> + <output name="unalqueries"> + <assert_contents> + <has_line line=">shuffled sequence that should go to unaligned"/> + </assert_contents> + </output> + <output name="alqueries"> + <assert_contents> + <has_line line=">sequence more text"/> + </assert_contents> + </output> <output name="blast_tabular" file="diamond_results.tabular"/> </test> - <test> + <test expect_num_outputs="1"> <conditional name="method_cond"> <param name="method_select" value="blastp" /> </conditional> @@ -300,7 +334,7 @@ <param name="sensitivity" value=""/> </conditional> <param name="matrix" value="BLOSUM62"/> - <param name="comp-based-stat" value="1"/> + <param name="comp_based_stats" value="1"/> <param name="masking" value="1"/> <conditional name="hit_filter"> <param name="hit_filter_select" value="max"/> @@ -317,7 +351,7 @@ </conditional> <output name="blast_tabular" file="diamond_results.wtax.tabular"/> </test> - <test> + <test expect_num_outputs="1"> <conditional name="method_cond"> <param name="method_select" value="blastx" /> <conditional name="frameshift_cond"> @@ -336,7 +370,7 @@ <param name="sensitivity" value=""/> </conditional> <param name="matrix" value="BLOSUM62"/> - <param name="comp-based-stat" value="1"/> + <param name="comp_based_stats" value="1"/> <param name="masking" value="1"/> <conditional name="hit_filter"> <param name="hit_filter_select" value="top"/> @@ -353,7 +387,7 @@ </conditional> <output name="blast_tabular" file="diamond_results.pairwise"/> </test> - <test> + <test expect_num_outputs="1"> <conditional name="method_cond"> <param name="method_select" value="blastp" /> </conditional> @@ -364,10 +398,12 @@ </conditional> <conditional name="output"> <param name="outfmt" value="100"/> + <param name="salltitles" value="false"/> + <param name="sallseqid" value="false"/> </conditional> <output name="daa_output" file="diamond_results.daa" compare="sim_size" delta="10"/> </test> - <test> + <test expect_num_outputs="1"> <conditional name="method_cond"> <param name="method_select" value="blastx" /> <conditional name="frameshift_cond"> @@ -386,7 +422,7 @@ <param name="sensitivity" value=""/> </conditional> <param name="matrix" value="BLOSUM62"/> - <param name="comp-based-stat" value="1"/> + <param name="comp_based_stats" value="1"/> <param name="masking" value="1"/> <conditional name="hit_filter"> <param name="hit_filter_select" value="top"/>
--- a/diamond_makedb.xml Mon Mar 22 13:21:23 2021 +0000 +++ b/diamond_makedb.xml Sat Nov 27 09:48:10 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="bg_diamond_makedb" name="Diamond makedb" version="@VERSION@" profile="19.01"> +<tool id="bg_diamond_makedb" name="Diamond makedb" version="@TOOL_VERSION@" profile="19.01"> <description>Build database from a FASTA file</description> <macros> <import>macros.xml</import>
--- a/diamond_view.xml Mon Mar 22 13:21:23 2021 +0000 +++ b/diamond_view.xml Sat Nov 27 09:48:10 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="bg_diamond_view" name="Diamond view" version="@VERSION@" profile="19.01"> +<tool id="bg_diamond_view" name="Diamond view" version="@TOOL_VERSION@.1" profile="19.01"> <description>generate formatted output from DAA files</description> <macros> <import>macros.xml</import> @@ -29,7 +29,7 @@ <expand macro="output_macro" /> </outputs> <tests> - <test> + <test expect_num_outputs="1"> <param name="daa" ftype="daa" value="diamond_results.daa" /> <conditional name="output"> <param name="outfmt" value="5"/> @@ -40,7 +40,7 @@ </conditional> <output name="blast_tabular" file="diamond_results.xml"/> </test> - <test> + <test expect_num_outputs="1"> <param name="daa" ftype="daa" value="diamond_results.daa" /> <conditional name="output"> <param name="outfmt" value="6"/> @@ -48,7 +48,7 @@ </conditional> <output name="blast_tabular" file="diamond_view_results.tabular"/> </test> - <test> + <test expect_num_outputs="1"> <param name="daa" ftype="daa" value="diamond_results.daa" /> <conditional name="output"> <param name="outfmt" value="101"/>
--- a/macros.xml Mon Mar 22 13:21:23 2021 +0000 +++ b/macros.xml Sat Nov 27 09:48:10 2021 +0000 @@ -1,9 +1,9 @@ <macros> - <token name="@VERSION@">2.0.8</token> + <token name="@TOOL_VERSION@">2.0.8</token> <xml name="requirements"> <requirements> - <requirement type="package" version="@VERSION@">diamond</requirement> + <requirement type="package" version="@TOOL_VERSION@">diamond</requirement> </requirements> </xml> @@ -69,6 +69,7 @@ <option value="cigar">Cigar</option> <yield/> </param> + <param argument="--unal" type="boolean" label="Report unaligned queries" truevalue="1" falsevalue="0" checked="false"/> </when> <when value="100"> <param argument="--salltitles" type="boolean" truevalue="--salltitles" falsevalue="" checked="true" label="Include full subject titles in DAA file?" help=""/> @@ -99,11 +100,11 @@ </xml> <xml name="block_size_low_sens"> - <param name="block_size" argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time" help="" /> + <param argument="--block-size" type="float" value="2" label="Block size in billions of sequence letters to be processed at a time" help="" /> </xml> <xml name="block_size_hi_sens"> - <param name="block_size" argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time" help="" /> + <param argument="--block-size" type="float" value="0.4" label="Block size in billions of sequence letters to be processed at a time" help="" /> </xml> <xml name="citations"> @@ -145,11 +146,16 @@ #else if $output.outfmt == "6" --outfmt '6' #echo ' '.join(str($output.fields).split(',')) --out '$blast_tabular' + --unal $output.unal #else if $output.outfmt == "100" --outfmt '100' + $output.salltitles + $output.sallseqid --out output.daa #else if $output.outfmt == "101" --outfmt '101' + $output.salltitles + $output.sallseqid --out '$sam_output' #else if $output.outfmt == "102" --outfmt '102'
--- a/test-data/diamond_results.tabular Mon Mar 22 13:21:23 2021 +0000 +++ b/test-data/diamond_results.tabular Sat Nov 27 09:48:10 2021 +0000 @@ -1,2 +1,3 @@ -sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 94M1D189M 100 0 0 0 -sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 105M1D178M 100 0 0 0 +sequence gi|5524211|gb|AAD44166.1| 99.6 284 0 1 1 283 1 284 1.44e-205 550 100 0 0 0 +sequence gi|5524212|gb|AAD44167.1| 79.6 284 57 1 1 283 1 284 5.77e-150 409 100 0 0 0 +shuffled * -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 * * *
--- a/test-data/protein.fasta Mon Mar 22 13:21:23 2021 +0000 +++ b/test-data/protein.fasta Sat Nov 27 09:48:10 2021 +0000 @@ -4,3 +4,9 @@ LLILILLLLLLALLSPDMLGDPDNHMPADPLNTPLHIKPEWYFLFAYAILRSVPNKLGGVLALFLSIVIL GLMPFLHTSKHRSMMLRPLSQALFWTLTMDLLTLTWIGSQPVEYPYTIIGQMASILYFSIILAFLPIAGX IENY +>shuffled sequence that should go to unaligned +XLPLILMLLGISPGSFEHTVAGGIWTSLMLFLPGYPGVGFLMLLVITVPALNFKFGFMLL +LKPTTNIIKTLVLALTHADDPLSFPWLNYMPPAADFNGLFTNAGATTTLYQIPYEGSFYL +AAIYGSMLHENHYLYRSMTPVGWLHLGDSGLRFMLLPIYYARITYDNVPAGWFLSVNTIL +GLTAILLEAIKALMANYSESQEPFCFSTGMKHSFIISDILGWDMSLYIILLIPHTNPFVL +TFLTLILWLDILSRYTLLQVNLIIFMTRHGHFQIADIWYWLKS