Mercurial > repos > iuc > bioext_bealign
comparison bealign.xml @ 3:fb4975b507c6 draft
"planemo upload for repository https://github.com/davebx/bioext-gx/ commit af3bfbbd3f1236bf96a25bcb8483f2889295ec0c"
author | iuc |
---|---|
date | Fri, 20 Aug 2021 21:04:17 +0000 |
parents | d8b6f0adaa79 |
children | a287431cdf4f |
comparison
equal
deleted
inserted
replaced
2:d8b6f0adaa79 | 3:fb4975b507c6 |
---|---|
1 <?xml version="1.0"?> | 1 <?xml version="1.0"?> |
2 <tool id="bioext_bealign" name="Align sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | 2 <tool id="bioext_bealign" name="Align sequences" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> |
3 <description>to a reference using a codon alignment algorithm</description> | 3 <description>to a reference using a codon alignment algorithm</description> |
4 <macros> | 4 <macros> |
5 <import>macros.xml</import> | 5 <import>macros.xml</import> |
6 <token name="@VERSION_SUFFIX@">0</token> | 6 <token name="@VERSION_SUFFIX@">1</token> |
7 </macros> | 7 </macros> |
8 <expand macro="requirements" /> | 8 <expand macro="requirements"> |
9 <requirement type="package" version="5.1.0">gawk</requirement> | |
10 </expand> | |
9 <version_command>bealign --version</version_command> | 11 <version_command>bealign --version</version_command> |
10 <command detect_errors="exit_code"> | 12 <command detect_errors="exit_code"> |
11 <![CDATA[ | 13 <![CDATA[ |
12 ## Some downstream tools, such as the TN-93 clustering tool and RAxML, might | 14 ## Some downstream tools, such as the TN-93 clustering tool and RAxML, might |
13 ## break if there are non-standard characters in the sequences or text other | 15 ## break if there are non-standard characters in the sequences or text other |
14 ## than alphanumerics in the sequence names, so we run the input dataset | 16 ## than alphanumerics in the sequence names, so we run the input dataset |
15 ## through a simple awk script to remove any non-IUPAC-standard nucleotides | 17 ## through a simple awk script to remove any non-IUPAC-standard nucleotides |
16 ## and replace any unwanted characters in the sequence names with underscores. | 18 ## and replace any unwanted characters in the sequence names with underscores. |
17 ## This should not affect the actual alignment, since any non-standard character | 19 ## This should not affect the actual alignment, since any non-standard character |
18 ## in the sequences is already ignored, but the possibility remains. | 20 ## in the sequences is already ignored, but the possibility remains. |
19 cat '$input' | awk '{ if (\$0 ~ "^[^>]") {a = gensub(/[^ACGTURYKMSWBDHVNacgturykmswbdhvn?-]/, "", "g"); } else {a=gensub(/[^>A-Za-z0-9_]/, "_", "g"); }; print a } ' | | 21 cat '$input' @SANITIZE@ reads.fa && |
20 sed 's,_\\+,_,g' > reads.fa && | |
21 bealign --reference '$select_reference.reference' --alphabet $advanced.alphabet | 22 bealign --reference '$select_reference.reference' --alphabet $advanced.alphabet |
22 #if $advanced.expected_identity: | 23 #if $advanced.expected_identity: |
23 --expected-identity $advanced.expected_identity | 24 --expected-identity $advanced.expected_identity |
24 #end if | 25 #end if |
25 --score-matrix $advanced.score_matrix $advanced.reverse_complement $advanced.keep_reference | |
26 #if $advanced.discard: | 26 #if $advanced.discard: |
27 $advanced.discard '$advanced.discarded_reads' | 27 $advanced.discard '$advanced.discarded_reads' |
28 #end if | 28 #end if |
29 reads.fa alignment.bam | 29 --score-matrix $advanced.score_matrix |
30 $advanced.reverse_complement | |
31 $advanced.keep_reference | |
32 reads.fa '$output' | |
33 #set $input_background = False | |
34 #if $background_source.selection == 'history': | |
35 #if $background_source.sequences: | |
36 #set $input_background = $background_source.sequences | |
37 #end if | |
38 #else: | |
39 #if $background_source.sequences: | |
40 #set $input_background = $background_source.sequences.fields.path | |
41 #end if | |
42 #end if | |
43 #if $input_background: | |
44 && cat '$input_background' @SANITIZE@ background.fa && | |
45 bealign --reference '$select_reference.reference' --alphabet $advanced.alphabet | |
46 #if $advanced.expected_identity: | |
47 --expected-identity $advanced.expected_identity | |
48 #end if | |
49 --keep-reference --score-matrix $advanced.score_matrix $advanced.reverse_complement | |
50 background.fa '$background' | |
51 #end if | |
52 #set $reference_name = str($select_reference.reference) | |
53 #if $select_reference.reference_type == 'preset' and $select_reference.save_reference: | |
54 && python '$__tool_directory__/copy_reference.py' --reference '$reference_name' --dataset '$saved_reference' | |
55 #end if | |
30 ]]> | 56 ]]> |
31 </command> | 57 </command> |
32 <inputs> | 58 <inputs> |
33 <param name="input" type="data" format="fasta" label="Input reads" help="For the benefit of certain tools that depend on this aligner, such as the TN-93 clustering tool, this dataset's sequence names will have non-alphanumeric characters replaced with underscores, and the sequences will be restricted to the set of IUPAC nucleotide characters." /> | 59 <param name="input" type="data" format="fasta" label="Input reads" help="For the benefit of certain tools that depend on this aligner, such as the TN-93 clustering tool, this dataset's sequence names will have non-alphanumeric characters replaced with underscores, and the sequences will be restricted to the set of IUPAC nucleotide characters." /> |
34 <conditional name="select_reference"> | 60 <conditional name="select_reference"> |
79 <option value="CoV2-ORF7b">SARS-CoV-2: ORF7b</option> | 105 <option value="CoV2-ORF7b">SARS-CoV-2: ORF7b</option> |
80 <option value="CoV2-ORF8">SARS-CoV-2: ORF8</option> | 106 <option value="CoV2-ORF8">SARS-CoV-2: ORF8</option> |
81 <option value="CoV2-ORF10">SARS-CoV-2: ORF10</option> | 107 <option value="CoV2-ORF10">SARS-CoV-2: ORF10</option> |
82 <option value="CoV2-RdRp">SARS-CoV-2: RNA-dependent RNA polymerase</option> | 108 <option value="CoV2-RdRp">SARS-CoV-2: RNA-dependent RNA polymerase</option> |
83 </param> | 109 </param> |
110 <param name="save_reference" type="boolean" display="radio" label="Save this reference to your history" /> | |
84 </when> | 111 </when> |
85 <when value="dataset"> | 112 <when value="dataset"> |
86 <param argument="--reference" type="data" format="fasta" label="Reference sequences" /> | 113 <param argument="--reference" type="data" format="fasta" label="Reference sequences" /> |
87 </when> | 114 </when> |
88 </conditional> | 115 </conditional> |
116 <conditional name="background_source"> | |
117 <param name="selection" type="select" label="Source for the background" help="You can use a predefined background cached on this Galaxy server or select a dataset from your history"> | |
118 <option value="data_table">Use a predefined background</option> | |
119 <option value="history">Select a dataset from your history</option> | |
120 </param> | |
121 <when value="data_table"> | |
122 <param name="sequences" type="select" optional="true" label="Select sequences from data table"> | |
123 <options from_data_table="bealign_selection" /> | |
124 </param> | |
125 </when> | |
126 <when value="history"> | |
127 <param name="sequences" type="data" format="fasta" optional="true" label="Select dataset with sequences" /> | |
128 </when> | |
129 </conditional> | |
89 <section name="advanced" title="Advanced options" expanded="False"> | 130 <section name="advanced" title="Advanced options" expanded="False"> |
90 <param name="expected_identity" argument="--expected-identity" type="float" min="0" max="1" optional="True" label="Discard sequences that are insufficiently identical to the reference" /> | 131 <param argument="--expected-identity" type="float" min="0" max="1" optional="True" label="Discard sequences that are insufficiently identical to the reference" /> |
91 <param argument="--alphabet" type="select" label="Alphabet to use for alignment"> | 132 <param argument="--alphabet" type="select" label="Alphabet to use for alignment"> |
92 <option value="codon" selected="True">Codon</option> | 133 <option value="codon" selected="True">Codon</option> |
93 <option value="dna">DNA</option> | 134 <option value="dna">DNA</option> |
94 <option value="amino">Amino acids</option> | 135 <option value="amino">Amino acids</option> |
95 </param> | 136 </param> |
108 <param name="reverse_complement" argument="--reverse-complement" type="boolean" checked="False" truevalue="--reverse-complement" falsevalue="" label="Also try to align against reverse complement of reference" /> | 149 <param name="reverse_complement" argument="--reverse-complement" type="boolean" checked="False" truevalue="--reverse-complement" falsevalue="" label="Also try to align against reverse complement of reference" /> |
109 <param name="keep_reference" argument="--keep-reference" type="boolean" checked="False" truevalue="--keep-reference" falsevalue="" label="Include reference as first sequence in aligned BAM" /> | 150 <param name="keep_reference" argument="--keep-reference" type="boolean" checked="False" truevalue="--keep-reference" falsevalue="" label="Include reference as first sequence in aligned BAM" /> |
110 </section> | 151 </section> |
111 </inputs> | 152 </inputs> |
112 <outputs> | 153 <outputs> |
113 <data name="output" format="bam" from_work_dir="alignment.bam" /> | 154 <data name="output" format="bam" label="${tool.name} on ${on_string} - Aligned Sequences" /> |
155 <data name="background" format="bam" label="${tool.name} on ${on_string} - Background" > | |
156 <filter>background_source['sequences']</filter> | |
157 </data> | |
158 <data name="saved_reference" format="fasta" label="${tool.name} on ${on_string} - Reference" > | |
159 <filter>select_reference['save_reference']</filter> | |
160 </data> | |
114 <data name="discarded_reads" format="fasta"> | 161 <data name="discarded_reads" format="fasta"> |
115 <filter>advanced['discard']</filter> | 162 <filter>advanced['discard']</filter> |
116 </data> | 163 </data> |
117 </outputs> | 164 </outputs> |
118 <tests> | 165 <tests> |
119 <test> | 166 <test> |
120 <param name="input" ftype="fasta" value="bealign-in1.fa" /> | 167 <param name="input" ftype="fasta" value="query.fa" /> |
121 <param name="reference_type" value="dataset" /> | 168 <param name="reference_type" value="dataset" /> |
122 <param name="score_matrix" value="HIV_BETWEEN_F" /> | 169 <param name="score_matrix" value="HIV_BETWEEN_F" /> |
123 <param name="reference" ftype="fasta" value="bealign-in-ref-1.fa" /> | 170 <param name="reference" ftype="fasta" value="reference.fa" /> |
124 <output name="output" file="bealign-out1.bam" ftype="bam" lines_diff="2" /> | 171 <output name="output" file="bealign-out1.bam" ftype="bam" lines_diff="2" /> |
125 </test> | 172 </test> |
126 <test> | 173 <test> |
127 <param name="input" ftype="fasta" value="bealign-in2.fa" /> | 174 <param name="input" ftype="fasta" value="query.fa" /> |
128 <param name="reference_type" value="dataset" /> | 175 <param name="reference_type" value="preset" /> |
129 <param name="score_matrix" value="BLOSUM62" /> | 176 <param name="reference" value="CoV2-nsp8" /> |
130 <param name="reference" ftype="fasta" value="bealign-in-ref-2.fa" /> | 177 <param name="score_matrix" value="HIV_BETWEEN_F" /> |
131 <output name="output" file="bealign-out2.bam" ftype="bam" lines_diff="2"/> | 178 <output name="output" file="bealign-out2.bam" ftype="bam" lines_diff="2"/> |
132 </test> | 179 </test> |
133 <test> | 180 <test expect_num_outputs="2"> |
134 <param name="input" ftype="fasta" value="bealign-in2.fa" /> | 181 <param name="input" ftype="fasta" value="query.fa" /> |
135 <param name="reference_type" value="dataset" /> | 182 <param name="reference_type" value="preset" /> |
136 <param name="expected_identity" value="0.9" /> | 183 <param name="reference" value="CoV2-nsp8" /> |
137 <param name="score_matrix" value="BLOSUM62" /> | 184 <param name="add_background" value="Yes" /> |
138 <param name="reference" ftype="fasta" value="bealign-in-ref-2.fa" /> | 185 <param name="background_source" value="data_table" /> |
186 <param name="sequences" value="CoV2-nsp8" /> | |
187 <param name="alphabet" value="codon" /> | |
188 <param name="score_matrix" value="HIV_BETWEEN_F" /> | |
139 <output name="output" file="bealign-out3.bam" ftype="bam" lines_diff="2"/> | 189 <output name="output" file="bealign-out3.bam" ftype="bam" lines_diff="2"/> |
190 <output name="background" file="bealign-out3-background.bam" ftype="bam" lines_diff="2"/> | |
191 </test> | |
192 <test expect_num_outputs="2"> | |
193 <param name="input" ftype="fasta" value="query.fa" /> | |
194 <param name="reference_type" value="preset" /> | |
195 <param name="reference" value="CoV2-nsp8" /> | |
196 <param name="save_reference" value="true" /> | |
197 <param name="add_background" value="No" /> | |
198 <param name="alphabet" value="codon" /> | |
199 <param name="score_matrix" value="HIV_BETWEEN_F" /> | |
200 <output name="output" file="bealign-out4.bam" ftype="bam" lines_diff="2"/> | |
201 <output name="saved_reference" file="reference.fa" ftype="fasta"/> | |
140 </test> | 202 </test> |
141 </tests> | 203 </tests> |
142 <help> | 204 <help> |
143 <![CDATA[ | 205 <![CDATA[ |
144 bealign | 206 bealign |