Mercurial > repos > iuc > seqtk

<?xml version="1.0"?>
<tool id="seqtk_mergefa" name="seqtk_mergefa" version="@TOOL_VERSION@+galaxy1" profile="22.05">
    <description>Merge two FASTA/Q files into a FASTA file output</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <expand macro="stdio"/>
    <command><![CDATA[
seqtk mergefa
-q $q
$i
$m
$r
$h
'$in_fa1'
'$in_fa2'
#echo "| pigz -p ${GALAXY_SLOTS:-1} --no-name --no-time" if $in_fa1.is_of_type('fasta.gz', 'fastq.gz') else "" # > '$default'
    ]]></command>
    <configfiles>
        <configfile filename="outputs.json">
#set $ext = None
#if $in_fa1.is_of_type('fasta.gz', 'fastq.gz')
    #set $ext = "fasta.gz"
#else
    #set $ext = "fasta"
#end if
{"default": {"ext": "$ext"}}
        </configfile>
    </configfiles>
    <inputs>
        <param name="in_fa1" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input FASTA/Q file #1"/>
        <param name="in_fa2" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Input FASTA/Q file #2"/>
        <param argument="-q" type="integer" value="0" label="Quality threshold (for FASTQ)"/>
        <param argument="-i" type="boolean" truevalue="-i" falsevalue="" checked="false" label="Take intersection" />
        <param argument="-m" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Pick least ambiguous, mask conflicts and uncertainties" help="Tries to pick the least ambiguous symbol from the two inputs, but masks contradictory bases in the inputs as x in the merged result and converts the merged base to lowercase where one of the input bases is an N." />
        <param argument="-r" type="boolean" truevalue="-r" falsevalue="" checked="false" label="Pick a random allele from het" />
        <param argument="-h" type="boolean" truevalue="-h" falsevalue="" checked="false" label="Suppress hets in the input" />
    </inputs>
    <outputs provided_metadata_file="outputs.json">
        <data name="default" format="auto" label="${tool.name} on ${on_string}" />
    </outputs>
    <tests>
        <test>
            <param name="in_fa1" value="seqtk_mergefa1.fa"/>
            <param name="in_fa2" value="seqtk_mergefa2.fa"/>
            <output name="default" file="seqtk_mergefa.out" ftype="fasta"/>
        </test>
        <test>
            <param name="in_fa1" value="seqtk_mergefa1.fa"/>
            <param name="in_fa2" value="seqtk_mergefa2.fa"/>
            <param name="m" value="True" />
            <output name="default" file="seqtk_mergefa2.out" ftype="fasta"/>
        </test>
        <test>
            <param name="in_fa1" value="seqtk_mergefa1.fa.gz" ftype="fasta.gz"/>
            <param name="in_fa2" value="seqtk_mergefa2.fa.gz" ftype="fasta.gz"/>
            <param name="m" value="True" />
            <output name="default" file="seqtk_mergefa2.out.gz" ftype="fasta.gz"/>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool merges two FASTA or FASTQ files into a single FASTA file using IUPAC ambiguity codes where appropriate.
When differences occur between the sequences, ambiguity codes are used to represent possible variations.

Example::

  >seq1
  ACTGACTGAAA

  >seq2
  ACTGAMTGCGN

will result in::

  >seq1
  ACTGAMTGMRN

If the `-m` option is in use, however, the tool will pick the least ambiguous base if there is no contradiction between the symbols in the inputs. Conflicts are indicated by using x in the merged sequence and the picked base is converted to lowercase if the less specific symbol is an N to express uncertainty.
With this logic the input sequences above will result in the merge result::

  >seq1
  ACTGACTGxxa

@ATTRIBUTION@
    ]]></help>
    <expand macro="citation" />
</tool>
author	iuc
date	Wed, 16 Oct 2024 09:08:52 +0000
parents	4b494533146a
children