view bealign.xml @ 2:d8b6f0adaa79 draft

"planemo upload for repository https://github.com/davebx/bioext-gx/ commit b8438e86497e5f1084cca1c9b2fdc82c80a7e1b0"
author iuc
date Fri, 09 Jul 2021 15:19:05 +0000
parents f9b72a376ec9
children fb4975b507c6
line wrap: on
line source

<?xml version="1.0"?>
<tool id="bioext_bealign" name="Align sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>to a reference using a codon alignment algorithm</description>
    <macros>
        <import>macros.xml</import>
        <token name="@VERSION_SUFFIX@">0</token>
    </macros>
    <expand macro="requirements" />
    <version_command>bealign --version</version_command>
    <command detect_errors="exit_code">
    <![CDATA[
    ## Some downstream tools, such as the TN-93 clustering tool and RAxML, might
    ## break if there are non-standard characters in the sequences or text other
    ## than alphanumerics in the sequence names, so we run the input dataset
    ## through a simple awk script to remove any non-IUPAC-standard nucleotides
    ## and replace any unwanted characters in the sequence names with underscores.
    ## This should not affect the actual alignment, since any non-standard character
    ## in the sequences is already ignored, but the possibility remains.
    cat '$input' | awk '{ if (\$0 ~ "^[^>]") {a = gensub(/[^ACGTURYKMSWBDHVNacgturykmswbdhvn?-]/, "", "g"); } else {a=gensub(/[^>A-Za-z0-9_]/, "_", "g"); }; print a } ' |
        sed 's,_\\+,_,g' > reads.fa &&
    bealign --reference '$select_reference.reference' --alphabet $advanced.alphabet
        #if $advanced.expected_identity:
            --expected-identity $advanced.expected_identity
        #end if
        --score-matrix $advanced.score_matrix $advanced.reverse_complement $advanced.keep_reference
        #if $advanced.discard:
            $advanced.discard '$advanced.discarded_reads'
        #end if
        reads.fa alignment.bam
    ]]>
    </command>
    <inputs>
        <param name="input" type="data" format="fasta" label="Input reads" help="For the benefit of certain tools that depend on this aligner, such as the TN-93 clustering tool, this dataset's sequence names will have non-alphanumeric characters replaced with underscores, and the sequences will be restricted to the set of IUPAC nucleotide characters." />
        <conditional name="select_reference">
            <param name="reference_type" type="select">
                <option value="preset">Select preset</option>
                <option value="dataset">Use a history dataset</option>
            </param>
            <when value="preset">
                <param argument="--reference" type="select">
                    <option value="HXB2_tat">HXB2 tat</option>
                    <option value="HXB2_gag">HXB2 gag</option>
                    <option value="HXB2_pol">HXB2 polymerase</option>
                    <option value="HXB2_int">HXB2 integrase</option>
                    <option value="HXB2_vif">HXB2 vif</option>
                    <option value="HXB2_pr">HXB2 protease</option>
                    <option value="HXB2_vpr">HXB2 vpr</option>
                    <option value="NL4-3_prrt">NL4-3 protease and reverse transcriptase</option>
                    <option value="HXB2_nef">HXB2 nef</option>
                    <option value="HXB2_env">HXB2 envelope</option>
                    <option value="HXB2_rt">HXB2 reverse transcriptase</option>
                    <option value="HXB2_prrt">HXB2 protease and reverse transcriptase</option>
                    <option value="HXB2_rev">HXB2 rev</option>
                    <option value="HXB2_vpu">HXB2 vpu</option>
                    <option value="CoV2-3C">SARS-CoV-2: 3C</option>
                    <option value="CoV2-S">SARS-CoV-2: Spike</option>
                    <option value="CoV2-E">SARS-CoV-2: Envelope</option>
                    <option value="CoV2-M">SARS-CoV-2: Membrane</option>
                    <option value="CoV2-N">SARS-CoV-2: Nucleoprotein</option>
                    <option value="CoV2-endornase">SARS-CoV-2: endornase</option>
                    <option value="CoV2-exonuclease">SARS-CoV-2: exonuclease</option>
                    <option value="CoV2-helicase">SARS-CoV-2: helicase</option>
                    <option value="CoV2-leader">SARS-CoV-2: leader</option>
                    <option value="CoV2-methyltransferase">SARS-CoV-2: methyltransferase</option>
                    <option value="CoV2-nsp2">SARS-CoV-2: nsp2</option>
                    <option value="CoV2-nsp3">SARS-CoV-2: nsp3</option>
                    <option value="CoV2-nsp4">SARS-CoV-2: nsp4</option>
                    <option value="CoV2-nsp6">SARS-CoV-2: nsp6</option>
                    <option value="CoV2-nsp7">SARS-CoV-2: nsp7</option>
                    <option value="CoV2-nsp8">SARS-CoV-2: nsp8</option>
                    <option value="CoV2-nsp9">SARS-CoV-2: nsp9</option>
                    <option value="CoV2-nsp10">SARS-CoV-2: nsp10</option>
                    <option value="CoV2-ORF1a">SARS-CoV-2: ORF1a</option>
                    <option value="CoV2-ORF1b">SARS-CoV-2: ORF1b</option>
                    <option value="CoV2-ORF3a">SARS-CoV-2: ORF3a</option>
                    <option value="CoV2-ORF5">SARS-CoV-2: ORF5</option>
                    <option value="CoV2-ORF6">SARS-CoV-2: ORF6</option>
                    <option value="CoV2-ORF7a">SARS-CoV-2: ORF7a</option>
                    <option value="CoV2-ORF7b">SARS-CoV-2: ORF7b</option>
                    <option value="CoV2-ORF8">SARS-CoV-2: ORF8</option>
                    <option value="CoV2-ORF10">SARS-CoV-2: ORF10</option>
                    <option value="CoV2-RdRp">SARS-CoV-2: RNA-dependent RNA polymerase</option>
                </param>
            </when>
            <when value="dataset">
                <param argument="--reference" type="data" format="fasta" label="Reference sequences" />
            </when>
        </conditional>
        <section name="advanced" title="Advanced options" expanded="False">
            <param name="expected_identity" argument="--expected-identity" type="float" min="0" max="1" optional="True" label="Discard sequences that are insufficiently identical to the reference" />
            <param argument="--alphabet" type="select" label="Alphabet to use for alignment">
                <option value="codon" selected="True">Codon</option>
                <option value="dna">DNA</option>
                <option value="amino">Amino acids</option>
            </param>
            <param name="score_matrix" argument="--score-matrix" type="select" label="Parametrize using score matrix">
                <option value="BLOSUM62" selected="True">Blocks substitution</option>
                <option value="DNA65">DNA, 65% expected identity</option>
                <option value="DNA70">DNA, 70% expected identity</option>
                <option value="DNA88">DNA, 88% expected identity</option>
                <option value="DNA80">DNA, 80% expected identity</option>
                <option value="DNA95">DNA, 95% expected identity</option>
                <option value="PAM200">PAM 200 substitution</option>
                <option value="PAM250">PAM 250 substitution</option>
                <option value="HIV_BETWEEN_F">HIV between+F</option>
            </param>
            <param argument="--discard" type="boolean" checked="False" truevalue="--discard" falsevalue="" label="Output discarded sequences to a separate dataset" />
            <param name="reverse_complement" argument="--reverse-complement" type="boolean" checked="False" truevalue="--reverse-complement" falsevalue="" label="Also try to align against reverse complement of reference" />
            <param name="keep_reference" argument="--keep-reference" type="boolean" checked="False" truevalue="--keep-reference" falsevalue="" label="Include reference as first sequence in aligned BAM" />
        </section>
    </inputs>
    <outputs>
        <data name="output" format="bam" from_work_dir="alignment.bam" />
        <data name="discarded_reads" format="fasta">
            <filter>advanced['discard']</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input" ftype="fasta" value="bealign-in1.fa" />
            <param name="reference_type" value="dataset" />
            <param name="score_matrix" value="HIV_BETWEEN_F" />
            <param name="reference" ftype="fasta" value="bealign-in-ref-1.fa" />
            <output name="output" file="bealign-out1.bam" ftype="bam" lines_diff="2" />
        </test>
        <test>
            <param name="input" ftype="fasta" value="bealign-in2.fa" />
            <param name="reference_type" value="dataset" />
            <param name="score_matrix" value="BLOSUM62" />
            <param name="reference" ftype="fasta" value="bealign-in-ref-2.fa" />
            <output name="output" file="bealign-out2.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test>
            <param name="input" ftype="fasta" value="bealign-in2.fa" />
            <param name="reference_type" value="dataset" />
            <param name="expected_identity" value="0.9" />
            <param name="score_matrix" value="BLOSUM62" />
            <param name="reference" ftype="fasta" value="bealign-in-ref-2.fa" />
            <output name="output" file="bealign-out3.bam" ftype="bam" lines_diff="2"/>
        </test>
    </tests>
    <help>
    <![CDATA[
bealign
-------

Align sequences to a reference using a codon alignment algorithm.

NOTES
-----

Reference can be one of the presets or a custom history reference.
    ]]></help>
    <expand macro="citations"/>
</tool>