Mercurial > repos > iuc > bioext_bealign
view bealign.xml @ 9:396cc0c65700 draft
planemo upload for repository https://github.com/davebx/bioext-gx/ commit c8457ba9ed581df0c2cfeb53a6cd4d93da3beba6
author | iuc |
---|---|
date | Mon, 10 Jun 2024 06:12:47 +0000 |
parents | 8a7462c0ac14 |
children |
line wrap: on
line source
<tool id="bioext_bealign" name="Align sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>to a reference using a codon alignment algorithm</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"> <requirement type="package" version="5.3.0">gawk</requirement> <requirement type="package" version="1.20">samtools</requirement> </expand> <version_command>bealign --version</version_command> <command detect_errors="exit_code"> <![CDATA[ ## Some downstream tools, such as the TN-93 clustering tool and RAxML, might ## break if there are non-standard characters in the sequences or text other ## than alphanumerics in the sequence names, so we run the input dataset ## through a simple awk script to remove any non-IUPAC-standard nucleotides ## and replace any unwanted characters in the sequence names with underscores. ## This should not affect the actual alignment, since any non-standard character ## in the sequences is already ignored, but the possibility remains. set -o pipefail; cat '$input' @SANITIZE@ reads.fa && ## bealign's parallel job executor uses the NCPU env variable NCPU=\${GALAXY_SLOTS:-2} bealign --reference '$select_reference.reference' --alphabet $advanced.alphabet #if $advanced.expected_identity: --expected-identity $advanced.expected_identity #end if #if $advanced.discard: $advanced.discard '$advanced.discarded_reads' #end if --score-matrix $advanced.score_matrix $advanced.reverse_complement $advanced.keep_reference ## bypass bealign's internal pysam-based, single-threaded BAM sorting --no-sort reads.fa bealign_out.bam && samtools sort -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '$output' bealign_out.bam #set $input_background = False #if $background_source.selection == 'history': #if $background_source.sequences: #set $input_background = $background_source.sequences #end if #else: #if $background_source.sequences: #set $input_background = $background_source.sequences.fields.path #end if #end if #if $input_background: && cat '$input_background' @SANITIZE@ background.fa && NCPU=\${GALAXY_SLOTS:-2} bealign --reference '$select_reference.reference' --alphabet $advanced.alphabet #if $advanced.expected_identity: --expected-identity $advanced.expected_identity #end if --keep-reference --score-matrix $advanced.score_matrix $advanced.reverse_complement ## bypass bealign's internal pysam-based, single-threaded BAM sorting --no-sort background.fa bealign_background.bam && samtools sort -@\${GALAXY_SLOTS:-2} -T "\${TMPDIR:-.}" -O bam -o '$background' bealign_background.bam #end if #set $reference_name = str($select_reference.reference) #if $select_reference.reference_type == 'preset' and $select_reference.save_reference: && python '$__tool_directory__/copy_reference.py' --reference '$reference_name' --dataset '$saved_reference' #end if ]]> </command> <inputs> <param name="input" type="data" format="fasta" label="Input reads" help="For the benefit of certain tools that depend on this aligner, such as the TN-93 clustering tool, this dataset's sequence names will have non-alphanumeric characters replaced with underscores, and the sequences will be restricted to the set of IUPAC nucleotide characters." /> <conditional name="select_reference"> <param name="reference_type" type="select"> <option value="preset">Select preset</option> <option value="dataset">Use a history dataset</option> </param> <when value="preset"> <param argument="--reference" type="select"> <option value="HXB2_tat">HXB2 tat</option> <option value="HXB2_gag">HXB2 gag</option> <option value="HXB2_pol">HXB2 polymerase</option> <option value="HXB2_int">HXB2 integrase</option> <option value="HXB2_vif">HXB2 vif</option> <option value="HXB2_pr">HXB2 protease</option> <option value="HXB2_vpr">HXB2 vpr</option> <option value="NL4-3_prrt">NL4-3 protease and reverse transcriptase</option> <option value="HXB2_nef">HXB2 nef</option> <option value="HXB2_env">HXB2 envelope</option> <option value="HXB2_rt">HXB2 reverse transcriptase</option> <option value="HXB2_prrt">HXB2 protease and reverse transcriptase</option> <option value="HXB2_rev">HXB2 rev</option> <option value="HXB2_vpu">HXB2 vpu</option> <option value="CoV2-3C">SARS-CoV-2: 3C</option> <option value="CoV2-S">SARS-CoV-2: Spike</option> <option value="CoV2-E">SARS-CoV-2: Envelope</option> <option value="CoV2-M">SARS-CoV-2: Membrane</option> <option value="CoV2-N">SARS-CoV-2: Nucleoprotein</option> <option value="CoV2-endornase">SARS-CoV-2: endornase</option> <option value="CoV2-exonuclease">SARS-CoV-2: exonuclease</option> <option value="CoV2-helicase">SARS-CoV-2: helicase</option> <option value="CoV2-leader">SARS-CoV-2: leader</option> <option value="CoV2-methyltransferase">SARS-CoV-2: methyltransferase</option> <option value="CoV2-nsp2">SARS-CoV-2: nsp2</option> <option value="CoV2-nsp3">SARS-CoV-2: nsp3</option> <option value="CoV2-nsp4">SARS-CoV-2: nsp4</option> <option value="CoV2-nsp6">SARS-CoV-2: nsp6</option> <option value="CoV2-nsp7">SARS-CoV-2: nsp7</option> <option value="CoV2-nsp8">SARS-CoV-2: nsp8</option> <option value="CoV2-nsp9">SARS-CoV-2: nsp9</option> <option value="CoV2-nsp10">SARS-CoV-2: nsp10</option> <option value="CoV2-ORF1a">SARS-CoV-2: ORF1a</option> <option value="CoV2-ORF1b">SARS-CoV-2: ORF1b</option> <option value="CoV2-ORF3a">SARS-CoV-2: ORF3a</option> <option value="CoV2-ORF5">SARS-CoV-2: ORF5</option> <option value="CoV2-ORF6">SARS-CoV-2: ORF6</option> <option value="CoV2-ORF7a">SARS-CoV-2: ORF7a</option> <option value="CoV2-ORF7b">SARS-CoV-2: ORF7b</option> <option value="CoV2-ORF8">SARS-CoV-2: ORF8</option> <option value="CoV2-ORF10">SARS-CoV-2: ORF10</option> <option value="CoV2-RdRp">SARS-CoV-2: RNA-dependent RNA polymerase</option> </param> <param name="save_reference" type="boolean" display="radio" label="Save this reference to your history" /> </when> <when value="dataset"> <param argument="--reference" type="data" format="fasta" label="Reference sequences" /> </when> </conditional> <conditional name="background_source"> <param name="selection" type="select" label="Source for the background" help="You can use a predefined background cached on this Galaxy server or select a dataset from your history"> <option value="data_table">Use a predefined background</option> <option value="history">Select a dataset from your history</option> </param> <when value="data_table"> <param name="sequences" type="select" optional="true" label="Select sequences from data table"> <options from_data_table="bealign_selection" /> </param> </when> <when value="history"> <param name="sequences" type="data" format="fasta" optional="true" label="Select dataset with sequences" /> </when> </conditional> <section name="advanced" title="Advanced options" expanded="False"> <param argument="--expected-identity" type="float" min="0" max="1" optional="True" label="Discard sequences that are insufficiently identical to the reference" /> <param argument="--alphabet" type="select" label="Alphabet to use for alignment"> <option value="codon" selected="True">Codon</option> <option value="dna">DNA</option> <option value="amino">Amino acids</option> </param> <param argument="--score-matrix" type="select" label="Parametrize using score matrix"> <option value="BLOSUM62" selected="True">Blocks substitution</option> <option value="DNA65">DNA, 65% expected identity</option> <option value="DNA70">DNA, 70% expected identity</option> <option value="DNA88">DNA, 88% expected identity</option> <option value="DNA80">DNA, 80% expected identity</option> <option value="DNA95">DNA, 95% expected identity</option> <option value="PAM200">PAM 200 substitution</option> <option value="PAM250">PAM 250 substitution</option> <option value="HIV_BETWEEN_F">HIV between+F</option> </param> <param argument="--discard" type="boolean" checked="False" truevalue="--discard" falsevalue="" label="Output discarded sequences to a separate dataset" /> <param argument="--reverse-complement" type="boolean" checked="False" truevalue="--reverse-complement" falsevalue="" label="Also try to align against reverse complement of reference" /> <param argument="--keep-reference" type="boolean" checked="False" truevalue="--keep-reference" falsevalue="" label="Include reference as first sequence in aligned BAM" /> </section> </inputs> <outputs> <data name="output" format="bam" label="${tool.name} on ${on_string} - Aligned Sequences" /> <data name="background" format="bam" label="${tool.name} on ${on_string} - Background" > <filter>background_source['sequences']</filter> </data> <data name="saved_reference" format="fasta" label="${tool.name} on ${on_string} - Reference" > <filter>select_reference['reference_type'] == 'preset' and select_reference['save_reference']</filter> </data> <data name="discarded_reads" format="fasta"> <filter>advanced['discard']</filter> </data> </outputs> <tests> <test expect_num_outputs="1"> <param name="input" ftype="fasta" value="query.fa" /> <param name="reference_type" value="dataset" /> <param name="score_matrix" value="HIV_BETWEEN_F" /> <param name="reference" ftype="fasta" value="reference.fa" /> <output name="output" file="bealign-out1.bam" ftype="bam" lines_diff="2" /> </test> <test expect_num_outputs="1"> <param name="input" ftype="fasta" value="query.fa" /> <param name="reference_type" value="preset" /> <param name="reference" value="CoV2-nsp8" /> <param name="score_matrix" value="HIV_BETWEEN_F" /> <output name="output" file="bealign-out2.bam" ftype="bam" lines_diff="2"/> </test> <test expect_num_outputs="2"> <param name="input" ftype="fasta" value="query.fa" /> <param name="reference_type" value="preset" /> <param name="reference" value="CoV2-nsp8" /> <conditional name="background_source"> <param name="selection" value="data_table" /> </conditional> <param name="sequences" value="CoV2-nsp8" /> <param name="alphabet" value="codon" /> <param name="score_matrix" value="HIV_BETWEEN_F" /> <output name="output" file="bealign-out3.bam" ftype="bam" lines_diff="2"/> <output name="background" file="bealign-out3-background.bam" ftype="bam" lines_diff="2"/> </test> <test expect_num_outputs="2"> <param name="input" ftype="fasta" value="query.fa" /> <param name="reference_type" value="preset" /> <param name="reference" value="CoV2-nsp8" /> <param name="save_reference" value="true" /> <param name="alphabet" value="codon" /> <param name="score_matrix" value="HIV_BETWEEN_F" /> <output name="output" file="bealign-out4.bam" ftype="bam" lines_diff="2"/> <output name="saved_reference" file="reference.fa" ftype="fasta"/> </test> </tests> <help> <![CDATA[ bealign ------- Align sequences to a reference using a codon alignment algorithm. NOTES ----- Reference can be one of the presets or a custom history reference. ]]></help> <expand macro="citations"/> </tool>