Mercurial > repos > iuc > seqtk
changeset 10:a019807f4e67 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seqtk commit 75d5141110314addc160504e0a1f9cd66443de66
| author | iuc | 
|---|---|
| date | Wed, 16 Oct 2024 09:08:52 +0000 | 
| parents | 4b494533146a | 
| children | 8511b6d85fc7 | 
| files | seqtk_mergefa.xml seqtk_seq.xml test-data/seqtk_seq_A.fasta test-data/seqtk_seq_A.fasta.gz | 
| diffstat | 4 files changed, 65 insertions(+), 26 deletions(-) [+] | 
line wrap: on
 line diff
--- a/seqtk_mergefa.xml Sat Dec 09 11:14:21 2023 +0000 +++ b/seqtk_mergefa.xml Wed Oct 16 09:08:52 2024 +0000 @@ -1,6 +1,6 @@ <?xml version="1.0"?> -<tool id="seqtk_mergefa" name="seqtk_mergefa" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> - <description>merge two FASTA/Q files</description> +<tool id="seqtk_mergefa" name="seqtk_mergefa" version="@TOOL_VERSION@+galaxy1" profile="22.05"> + <description>Merge two FASTA/Q files into a FASTA file output</description> <macros> <import>macros.xml</import> </macros> @@ -18,17 +18,28 @@ '$in_fa2' #echo "| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" if $in_fa1.is_of_type('fasta.gz', 'fastq.gz') else "" # > '$default' ]]></command> + <configfiles> + <configfile filename="outputs.json"> +#set $ext = None +#if $in_fa1.is_of_type('fasta.gz', 'fastq.gz') + #set $ext = "fasta.gz" +#else + #set $ext = "fasta" +#end if +{"default": {"ext": "$ext"}} + </configfile> + </configfiles> <inputs> <param name="in_fa1" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input FASTA/Q file #1"/> <param name="in_fa2" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input FASTA/Q file #2"/> - <param argument="-q" type="integer" value="0" label="Quality threshold"/> + <param argument="-q" type="integer" value="0" label="Quality threshold (for FASTQ)"/> <param argument="-i" type="boolean" truevalue="-i" falsevalue="" checked="false" label="Take intersection" /> - <param argument="-m" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Convert to lowercase when one of the input base is N" /> + <param argument="-m" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Pick least ambiguous, mask conflicts and uncertainties" help="Tries to pick the least ambiguous symbol from the two inputs, but masks contradictory bases in the inputs as x in the merged result and converts the merged base to lowercase where one of the input bases is an N." /> <param argument="-r" type="boolean" truevalue="-r" falsevalue="" checked="false" label="Pick a random allele from het" /> <param argument="-h" type="boolean" truevalue="-h" falsevalue="" checked="false" label="Suppress hets in the input" /> </inputs> - <outputs> - <data name="default" format_source="in_fa1" label="${tool.name} on ${on_string}"/> + <outputs provided_metadata_file="outputs.json"> + <data name="default" format="auto" label="${tool.name} on ${on_string}" /> </outputs> <tests> <test> @@ -52,24 +63,27 @@ <help><![CDATA[ **What it does** -Merges two fasta files, using ambiguity codes +This tool merges two FASTA or FASTQ files into a single FASTA file using IUPAC ambiguity codes where appropriate. +When differences occur between the sequences, ambiguity codes are used to represent possible variations. -:: +Example:: - # seq1.fa - >test0 - ACTGACTGAAA + >seq1 + ACTGACTGAAA + + >seq2 + ACTGAMTGCGN - # seq2.fa - >test0 - ACTGAMTGCGN +will result in:: + + >seq1 + ACTGAMTGMRN -In the following the `-m` option has been set to highlight seqtk-mergefa's features. +If the `-m` option is in use, however, the tool will pick the least ambiguous base if there is no contradiction between the symbols in the inputs. Conflicts are indicated by using x in the merged sequence and the picked base is converted to lowercase if the less specific symbol is an N to express uncertainty. +With this logic the input sequences above will result in the merge result:: -:: - - >test0 - ACTGACTGxxa + >seq1 + ACTGACTGxxa @ATTRIBUTION@ ]]></help>
--- a/seqtk_seq.xml Sat Dec 09 11:14:21 2023 +0000 +++ b/seqtk_seq.xml Wed Oct 16 09:08:52 2024 +0000 @@ -1,5 +1,5 @@ <?xml version="1.0"?> -<tool id="seqtk_seq" name="seqtk_seq" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> +<tool id="seqtk_seq" name="seqtk_seq" version="@TOOL_VERSION@+galaxy1" profile="22.05"> <description>common transformation of FASTA/Q</description> <macros> <import>macros.xml</import> @@ -34,13 +34,25 @@ '$in_file' @CONDITIONAL_GZIP_OUT@ ]]></command> + <configfiles> + <configfile filename="outputs.json"> +#if $A and $in_file.is_of_type('fasta.gz', 'fastq.gz') + #set $ext = "fasta.gz" +#elif $A + #set $ext = "fasta" +#else + #set $ext = $in_file.ext +#end if +{"default": {"ext": "$ext"}} + </configfile> + </configfiles> <inputs> <expand macro="in_faq"/> <param argument="-q" type="integer" value="0" label="Mask bases with quality lower than INT" /> <param argument="-X" type="integer" value="255" label="Mask bases with quality higher than INT" /> - <param argument="-n" type="text" value="0" label="Masked bases converted to CHAR; 0 for lowercase" /> + <param argument="-n" type="text" value="" label="Masked bases converted to CHAR; leave empty for lowercase masking" /> <param argument="-l" type="integer" value="0" label="Number of residues per line; 0 for 2^32-1" /> - <param argument="-Q" type="integer" value="33" label="Quality shift: ASCII-INT gives base quality" /> + <param argument="-Q" type="integer" value="33" label="Quality shift: ASCII-INT gives base quality" help="Only applied during comparison to quality thresholds for masking" /> <param argument="-s" type="integer" value="11" label="Random seed" help="Effective with -f" /> <param argument="-f" type="float" value="1" label="Sample fraction of sequences" /> <param argument="-M" type="data" format="bed,txt" optional="true" label="Mask regions in BED or name list file" /> @@ -53,26 +65,37 @@ <param name="x1" argument="-1" type="boolean" truevalue="-1" falsevalue="" checked="false" label="Output the 2n-1 reads only" /> <param name="x2" argument="-2" type="boolean" truevalue="-2" falsevalue="" checked="false" label="Output the 2n reads only" /> </inputs> - <outputs> - <data name="default" format_source="in_file" label="${tool.name} on ${on_string}" /> + <outputs provided_metadata_file="outputs.json"> + <data name="default" format="auto" label="${tool.name} on ${on_string}" /> </outputs> + <tests> <!-- This is a sorry excuse for a test for a tool which does way more than it should, but upstream decided to put a TON of functionality into a single tool rather than using the single responsibility principle. --> - <test> + <test expect_num_outputs="1"> <param name="in_file" value="seqtk_seq.fa"/> <param name="r" value="True"/> <param name="n" value=""/> <output name="default" file="seqtk_seq_revcom.fa" ftype="fasta"/> </test> - <test> + <test expect_num_outputs="1"> <param name="in_file" value="seqtk_seq.fa.gz" ftype="fasta.gz"/> <param name="r" value="True"/> <param name="n" value=""/> <output name="default" file="seqtk_seq_revcom.fa.gz" ftype="fasta.gz"/> </test> + <test expect_num_outputs="1"> + <param name="in_file" value="seqtk_trimfq.fq" ftype="fastq"/> + <param name="A" value="True" /> + <output name="default" file="seqtk_seq_A.fasta" ftype="fasta"/> + </test> + <test expect_num_outputs="1"> + <param name="in_file" value="seqtk_trimfq.fq.gz" ftype="fastq.gz"/> + <param name="A" value="True" /> + <output name="default" file="seqtk_seq_A.fasta.gz" ftype="fasta.gz"/> + </test> </tests> <help><