Mercurial > repos > iuc > bioext_bealign
changeset 1:f9b72a376ec9 draft
"planemo upload for repository https://github.com/davebx/bioext-gx/ commit 9a163dd8880c14f371e2603389f4951881a74b25"
author | iuc |
---|---|
date | Thu, 13 May 2021 16:57:26 +0000 |
parents | 6ef10b28e967 |
children | d8b6f0adaa79 |
files | bealign.xml macros.xml |
diffstat | 2 files changed, 18 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/bealign.xml Wed May 16 17:34:42 2018 -0400 +++ b/bealign.xml Thu May 13 16:57:26 2021 +0000 @@ -8,6 +8,15 @@ <version_command>bealign --version</version_command> <command detect_errors="exit_code"> <![CDATA[ + ## Some downstream tools, such as the TN-93 clustering tool and RAxML, might + ## break if there are non-standard characters in the sequences or text other + ## than alphanumerics in the sequence names, so we run the input dataset + ## through a simple awk script to remove any non-IUPAC-standard nucleotides + ## and replace any unwanted characters in the sequence names with underscores. + ## This should not affect the actual alignment, since any non-standard character + ## in the sequences is already ignored, but the possibility remains. + cat '$input' | awk '{ if (\$0 ~ "^[^>]") {a = gensub(/[^ACGTURYKMSWBDHVNacgturykmswbdhvn?-]/, "", "g"); } else {a=gensub(/[^>A-Za-z0-9_]/, "_", "g"); }; print a } ' | + sed 's,_\\+,_,g' > reads.fa && bealign --reference '$select_reference.reference' --alphabet $advanced.alphabet #if $advanced.expected_identity: --expected-identity $advanced.expected_identity @@ -16,11 +25,11 @@ #if $advanced.discard: $advanced.discard '$advanced.discarded_reads' #end if - '$input' '$output' + reads.fa alignment.bam ]]> </command> <inputs> - <param name="input" type="data" format="fasta" label="Input reads" /> + <param name="input" type="data" format="fasta" label="Input reads" help="For the benefit of certain tools that depend on this aligner, such as the TN-93 clustering tool, this dataset's sequence names will have non-alphanumeric characters replaced with underscores, and the sequences will be restricted to the set of IUPAC nucleotide characters." /> <conditional name="select_reference"> <param name="reference_type" type="select"> <option value="preset">Select preset</option> @@ -72,7 +81,7 @@ </section> </inputs> <outputs> - <data name="output" format="bam" /> + <data name="output" format="bam" from_work_dir="alignment.bam" /> <data name="discarded_reads" format="fasta"> <filter>advanced['discard']</filter> </data> @@ -83,14 +92,14 @@ <param name="reference_type" value="dataset" /> <param name="score_matrix" value="HIV_BETWEEN_F" /> <param name="reference" ftype="fasta" value="bealign-in-ref-1.fa" /> - <output name="output" file="bealign-out1.bam" /> + <output name="output" file="bealign-out1.bam" ftype="bam" /> </test> <test> <param name="input" ftype="fasta" value="bealign-in2.fa" /> <param name="reference_type" value="dataset" /> <param name="score_matrix" value="BLOSUM62" /> <param name="reference" ftype="fasta" value="bealign-in-ref-2.fa" /> - <output name="output" file="bealign-out2.bam" /> + <output name="output" file="bealign-out2.bam" ftype="bam" /> </test> <test> <param name="input" ftype="fasta" value="bealign-in2.fa" /> @@ -98,7 +107,7 @@ <param name="expected_identity" value="0.9" /> <param name="score_matrix" value="BLOSUM62" /> <param name="reference" ftype="fasta" value="bealign-in-ref-2.fa" /> - <output name="output" file="bealign-out3.bam" /> + <output name="output" file="bealign-out3.bam" ftype="bam" /> </test> </tests> <help>
--- a/macros.xml Wed May 16 17:34:42 2018 -0400 +++ b/macros.xml Thu May 13 16:57:26 2021 +0000 @@ -1,9 +1,10 @@ <?xml version="1.0"?> <macros> - <token name="@VERSION@">0.18.6</token> + <token name="@VERSION@">0.19.7</token> <xml name="requirements"> <requirements> <requirement type="package" version="@VERSION@">python-bioext</requirement> + <yield /> </requirements> </xml> <xml name="citations"> @@ -18,4 +19,4 @@ </citation> </citations> </xml> -</macros> \ No newline at end of file +</macros>