Mercurial > repos > rnateam > rnabob
comparison rnabob.xml @ 1:5a4b00c84f50 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rnabob commit 1527e05bcd748a2b3cef22e0e356697066a55635
author | rnateam |
---|---|
date | Sat, 11 Nov 2017 15:08:06 -0500 |
parents | cd00b4fe6552 |
children |
comparison
equal
deleted
inserted
replaced
0:cd00b4fe6552 | 1:5a4b00c84f50 |
---|---|
1 <tool id="rbc_rnabob" name="RNABOB" version="2.2.1.0"> | 1 <tool id="rbc_rnabob" name="RNABOB" version="2.2.1.0"> |
2 <description>Fast Pattern searching for RNA secondary structures</description> | 2 <description>Fast Pattern searching for RNA secondary structures</description> |
3 | |
3 <requirements> | 4 <requirements> |
4 <requirement type="package" version="2.2.1">rnabob</requirement> | 5 <requirement type="package" version="2.2.1">rnabob</requirement> |
5 </requirements> | 6 </requirements> |
6 <version_command>echo "2.2.1"</version_command> | |
7 <command> | |
8 <![CDATA[ | |
9 rnabob | |
10 -q | |
11 $fancy | |
12 $compStrands | |
13 $skipOverlapping | |
14 $descriptorFile | |
15 $sequenceFile > $stdout | |
16 ]]> | |
17 </command> | |
18 <stdio> | 7 <stdio> |
19 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> | 8 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> |
20 <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" /> | 9 <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" /> |
21 </stdio> | 10 </stdio> |
11 <version_command>echo "2.2.1"</version_command> | |
12 <command> | |
13 <![CDATA[ | |
14 rnabob | |
15 -q | |
16 $fancy | |
17 $compStrands | |
18 $skipOverlapping | |
19 $descriptorFile | |
20 $sequenceFile > $stdout | |
21 ]]> | |
22 </command> | |
22 <inputs> | 23 <inputs> |
23 <param name="descriptorFile" type="data" format="txt" multiple="false" label="Motif Descriptor File" help="This file contains the description of the motif for which to search"/> | 24 <param name="descriptorFile" type="data" format="txt" multiple="false" label="Motif Descriptor File" help="This file contains the description of the motif for which to search"/> |
24 <param name="sequenceFile" type="data" format="fasta" multiple="false" label="Sequence File" help="This file specifies the sequence in which the motif will be searched"/> | 25 <param name="sequenceFile" type="data" format="fasta" multiple="false" label="Sequence File" help="This file specifies the sequence in which the motif will be searched"/> |
25 <param name="compStrands" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Also search on complementary strands" help="-c : Search both strands of the supplied sequence"/> | 26 <param name="compStrands" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Also search on complementary strands" help="-c : Search both strands of the supplied sequence"/> |
26 <param name="skipOverlapping" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Skip overlapping matches" help="-s : This is a workaround to avoid a problem in the DNABANK, overlapping matches will be ignored"/> | 27 <param name="skipOverlapping" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Skip overlapping matches" help="-s : This is a workaround to avoid a problem in the DNABANK, overlapping matches will be ignored"/> |
27 <param name="fancy" type="boolean" checked="false" truevalue="-F" falsevalue="" label="Show Alignments" help="Display full alignments to pattern"/> | 28 <param name="fancy" type="boolean" checked="false" truevalue="-F" falsevalue="" label="Show Alignments" help="Display full alignments to pattern"/> |
28 </inputs> | 29 </inputs> |
29 <outputs> | 30 <outputs> |
30 <data format="txt" name="stdout" label="${tool.name} on ${on_string}" /> | 31 <data format="txt" name="stdout" label="${tool.name} on ${on_string}" /> |
31 </outputs> | 32 </outputs> |
32 <tests> | 33 <tests> |
46 <param name="fancy" value="False" /> | 47 <param name="fancy" value="False" /> |
47 <output name="stdout" file="trna.bob" /> | 48 <output name="stdout" file="trna.bob" /> |
48 </test> | 49 </test> |
49 </tests> | 50 </tests> |
50 <help> | 51 <help> |
52 <![CDATA[ | |
51 **What RNABOB does** | 53 **What RNABOB does** |
52 | 54 |
53 RNABOB allows searching a sequence database for RNA structural motifs. | 55 RNABOB allows searching a sequence database for RNA structural motifs. |
54 The probe motif is specified in a *descriptor* file, | 56 The probe motif is specified in a *descriptor* file, |
55 which describes its primary sequence, secondary structure, and tertiary constraints. | 57 which describes its primary sequence, secondary structure, and tertiary constraints. |
57 | 59 |
58 ----- | 60 ----- |
59 | 61 |
60 **Sequence database format** | 62 **Sequence database format** |
61 | 63 |
62 RNABOB is currently restricted to reading sequence files in FASTA format. | 64 RNABOB is currently restricted to reading sequence files in FASTA format. |
63 The command line version of RNABOB can also read sequence files in GCG, EMBL, GenBank and other formats. | 65 The command line version of RNABOB can also read sequence files in GCG, EMBL, GenBank and other formats. |
64 | 66 |
65 ----- | 67 ----- |
66 | 68 |
67 **Descriptor file syntax** | 69 **Descriptor file syntax** |
68 | 70 |
69 The descriptor file syntax is fairly powerful, and allows a great deal of freedom for specifying | 71 The descriptor file syntax is fairly powerful, and allows a great deal of freedom for specifying |
70 RNA motifs. The syntax is therefore a bit complicated. | 72 RNA motifs. The syntax is therefore a bit complicated. |
71 | 73 |
72 The descriptor file has two parts: a **topology** description and an **explicit** description. | 74 The descriptor file has two parts: a **topology** description and an **explicit** description. |
73 | 75 |
74 The first non-blank, non-comment line of the file is the topology description. It defines the | 76 The first non-blank, non-comment line of the file is the topology description. It defines the |
75 order of occurrence of a series of single-stranded, double-stranded and related elements. Each | 77 order of occurrence of a series of single-stranded, double-stranded and related elements. Each |
76 element must be given a unique name (a number, typically) and must be prefixed with '**s**', | 78 element must be given a unique name (a number, typically) and must be prefixed with '**s**', |
77 '**h**', or '**r**', indicating single-strand, helical, or a relational element. Helical and | 79 '**h**', or '**r**', indicating single-strand, helical, or a relational element. Helical and |
78 relational elements are paired to other elements, which are suffixed by a prime, **\'**. | 80 relational elements are paired to other elements, which are suffixed by a prime, **\'**. |
79 | 81 |
80 For example:: | 82 For example:: |
81 | 83 |
82 \ | 84 \ |
83 h1 s1 h1' | 85 h1 s1 h1' |
84 | 86 |
85 describes a hairpin loop structure with a simple helix and single-stranded loop. If the helix | 87 describes a hairpin loop structure with a simple helix and single-stranded loop. If the helix |
86 always contained a non-canonical base pair at one position, the topology coud be described as:: | 88 always contained a non-canonical base pair at one position, the topology coud be described as:: |
87 | 89 |
88 \ | 90 \ |
89 h1 r1 h2 s1 h2' r1' h1' | 91 h1 r1 h2 s1 h2' r1' h1' |
90 | 92 |
91 where r1,r1' indicate a correlation, where the sequence r1 constrains the sequence of r1'. | 93 where r1,r1' indicate a correlation, where the sequence r1 constrains the sequence of r1'. |
92 (Helices are a special case of this.) | 94 (Helices are a special case of this.) |
93 | 95 |
94 The remaining non-comment, non-blank lines are explicit descriptions of each element in turn. Each | 96 The remaining non-comment, non-blank lines are explicit descriptions of each element in turn. Each |
95 line contains 3 or 4 fields, separated by tabs or blank space. The first field is the name of the | 97 line contains 3 or 4 fields, separated by tabs or blank space. The first field is the name of the |
96 element, from the topology description. The second field is the number of mismatches allowed in | 98 element, from the topology description. The second field is the number of mismatches allowed in |
97 this element. The third field is the primary sequence constraint to apply to this element. | 99 this element. The third field is the primary sequence constraint to apply to this element. |
98 | 100 |
99 Helices and relational element pairs are specified on a single line rather than two. Mismatches | 101 Helices and relational element pairs are specified on a single line rather than two. Mismatches |
100 and primary sequence constraints are given as pairs, separated by a colon '**:**'. The left side | 102 and primary sequence constraints are given as pairs, separated by a colon '**:**'. The left side |
101 is the constraint applied to the upstream element, and the right side is applied to the downstream | 103 is the constraint applied to the upstream element, and the right side is applied to the downstream |
102 elements. | 104 elements. |
103 | 105 |
104 The primary sequence constraint is given as a sequence of nucleotides. Any IUPAC single-letter | 106 The primary sequence constraint is given as a sequence of nucleotides. Any IUPAC single-letter |
105 code is recognized, including N if the position can have any base identity. Allowed length | 107 code is recognized, including N if the position can have any base identity. Allowed length |
106 variations are specified with asterisks ``'*'``, where each ``*`` will allow either 0 or 1 N at | 108 variations are specified with asterisks ``'*'``, where each ``*`` will allow either 0 or 1 N at |
107 that position. | 109 that position. |
108 | 110 |
109 For example:: | 111 For example:: |
110 | 112 |
111 \ | 113 \ |
112 GGAGG******NNNAUG | 114 GGAGG******NNNAUG |
113 | 115 |
114 specifies a GGAGG Shine/Dalgarno site and an AUG initiation codon, separated by a spacer of 3 to 9 | 116 specifies a GGAGG Shine/Dalgarno site and an AUG initiation codon, separated by a spacer of 3 to 9 |
115 nucleotides of any sequence. | 117 nucleotides of any sequence. |
116 | 118 |
117 An alternative syntax can be used for very long gaps:: | 119 An alternative syntax can be used for very long gaps:: |
118 | 120 |
119 \ | 121 \ |
120 GGAGG[10]NNNAUG is the same as GGAGG**********NNNAUG | 122 GGAGG[10]NNNAUG is the same as GGAGG**********NNNAUG |
121 | 123 |
122 Be careful defining variable length helices and relational elements; if the number and type (gap | 124 Be careful defining variable length helices and relational elements; if the number and type (gap |
123 or identity) of position do not match on left and right sides, the program will refuse to accept | 125 or identity) of position do not match on left and right sides, the program will refuse to accept |
124 the descriptor. | 126 the descriptor. |
125 | 127 |
126 Relational elements have an additional field which specifies a "transformation matrix" of four | 128 Relational elements have an additional field which specifies a "transformation matrix" of four |
127 nucleotides, specifying the rule for making the ``r'`` pattern from the ``r`` sequence in order | 129 nucleotides, specifying the rule for making the ``r'`` pattern from the ``r`` sequence in order |
128 ``A-C-G-T``. For example, the transformation matrix for a simple helix is ``TGCA``; if you allow | 130 ``A-C-G-T``. For example, the transformation matrix for a simple helix is ``TGCA``; if you allow |
129 ``G-U`` pairs, it is ``TGYR``. RNABOB allows ``G-U`` pairing by default and uses the ``TGYR`` | 131 ``G-U`` pairs, it is ``TGYR``. RNABOB allows ``G-U`` pairing by default and uses the ``TGYR`` |
130 matrix for helical elements. | 132 matrix for helical elements. |
131 | 133 |
132 For example, the explicit description of our hairpin might be: | 134 For example, the explicit description of our hairpin might be: |
133 | 135 |
134 :: | 136 :: |
135 | 137 |
136 \ | 138 \ |
137 h1 0:0 NNN:NNN | 139 h1 0:0 NNN:NNN |
138 r1 0:0 R:N GNAN | 140 r1 0:0 R:N GNAN |
139 h2 0:0 **NC:GN** | 141 h2 0:0 **NC:GN** |
140 s1 0 UUCG | 142 s1 0 UUCG |
141 | 143 |
142 This describes a stem of 6 to 8 base pairs, in which the 4th pair from the bottom of the stem must | 144 This describes a stem of 6 to 8 base pairs, in which the 4th pair from the bottom of the stem must |
143 be a non-canonical GA pair. Note that, in general, the left side of the primary constraint for | 145 be a non-canonical GA pair. Note that, in general, the left side of the primary constraint for |
144 helices and relational elements is redundant, and should be given as all N. In some cases it is | 146 helices and relational elements is redundant, and should be given as all N. In some cases it is |
145 convenient to constrain the right side to require a particular base pair (GU, for instance) at one | 147 convenient to constrain the right side to require a particular base pair (GU, for instance) at one |
146 position. | 148 position. |
147 | 149 |
148 A note on mismatches: The split format for helices and relational elements works like this. The | 150 A note on mismatches: The split format for helices and relational elements works like this. The |
149 number on the left constrains the primary sequence match of the left side of the primary | 151 number on the left constrains the primary sequence match of the left side of the primary |
150 constraint. The number on the right constrains the match of the right side of the primary | 152 constraint. The number on the right constrains the match of the right side of the primary |
151 constraint, *after* that side has been constructed according to the sequence on the left. In other | 153 constraint, *after* that side has been constructed according to the sequence on the left. In other |
152 words, the number on the left constrains the mismatches in primary sequence only, while the number | 154 words, the number on the left constrains the mismatches in primary sequence only, while the number |
153 on the right will constrain the number of mispaired positions in the helix. | 155 on the right will constrain the number of mispaired positions in the helix. |
154 | 156 |
155 Finally: any line that begins with a pound sign '#' is a comment line, and will not be interpreted | 157 Finally: any line that begins with a pound sign '#' is a comment line, and will not be interpreted |
156 by the pattern compiler. | 158 by the pattern compiler. |
157 | 159 |
158 **Options** | 160 **Options** |
159 | 161 |
160 The behavior of RNABOB can be modified by use of the following options: | 162 The behavior of RNABOB can be modified by use of the following options: |
161 | 163 |
162 *Complement*: Selecting this option will cause RNABOB to search for the pattern also on the | 164 *Complement*: Selecting this option will cause RNABOB to search for the pattern also on the |
163 complementary strands. | 165 complementary strands. |
164 | 166 |
165 *Skip*: This is a workaround to avoid a problem in the DNABANK. There are some sequences in the | 167 *Skip*: This is a workaround to avoid a problem in the DNABANK. There are some sequences in the |
166 database which have long stretches of ambiguous sequence (N's). Descriptors with no primary | 168 database which have long stretches of ambiguous sequence (N's). Descriptors with no primary |
167 sequence constraints will match these garbage sequences at many, many positions, and generate huge | 169 sequence constraints will match these garbage sequences at many, many positions, and generate huge |
168 outputs. This option toggles a search strategy that skips forward a pattern-length rather than a | 170 outputs. This option toggles a search strategy that skips forward a pattern-length rather than a |
169 single base when a match is found, thus printing out only a single match when overlapping matches | 171 single base when a match is found, thus printing out only a single match when overlapping matches |
170 are found. | 172 are found. |
171 | 173 |
172 **Examples** | 174 **Examples** |
173 | 175 |
174 The following example descriptors included in the source distribution | 176 The following example descriptors included in the source distribution |
175 (http://selab.janelia.org/software/rnabob/rnabob.tar.gz): | 177 (http://selab.janelia.org/software/rnabob/rnabob.tar.gz): |
176 | 178 |
177 - trna.des - a general descriptor of a tRNA structure | 179 - trna.des - a general descriptor of a tRNA structure |
178 - r17.des - descriptor of the consensus binding site for the r17 phage coat protein | 180 - r17.des - descriptor of the consensus binding site for the r17 phage coat protein |
179 - pseudoknot.des - description of a simple pseudoknotted structure | 181 - pseudoknot.des - description of a simple pseudoknotted structure |
180 | 182 |
181 An example cosmid ``F22B7.fa`` from the *C. elegans* genome sequencing project is also provided | 183 An example cosmid ``F22B7.fa`` from the *C. elegans* genome sequencing project is also provided |
182 for running these descriptors against. | 184 for running these descriptors against. |
183 | 185 |
184 :: | 186 :: |
185 | 187 |
186 \ | 188 \ |
187 # trna.des | 189 # trna.des |
188 # | 190 # |
189 # Generalized descriptor of a tRNA cloverleaf. Doesn't | 191 # Generalized descriptor of a tRNA cloverleaf. Doesn't |
190 # find them all though. | 192 # find them all though. |
191 # | 193 # |
192 | 194 |
193 h1 s1 h2 s2 h2' s3 h3 s4 h3' s5 h4 s6 h4' h1' s8 | 195 h1 s1 h2 s2 h2' s3 h3 s4 h3' s5 h4 s6 h4' h1' s8 |
194 | 196 |
195 h1 0:2 NNNNNNN:NNNNNNN | 197 h1 0:2 NNNNNNN:NNNNNNN |
196 h2 0:1 *NNN:NNN* | 198 h2 0:1 *NNN:NNN* |
197 h3 0:1 NNNNN:NNNNN | 199 h3 0:1 NNNNN:NNNNN |
198 h4 0:1 NNNNN:NNNNN | 200 h4 0:1 NNNNN:NNNNN |
199 s1 0 TN | 201 s1 0 TN |
200 s2 0 NNNN********** | 202 s2 0 NNNN********** |
201 s3 0 N | 203 s3 0 N |
202 s4 0 NNNNNN* | 204 s4 0 NNNNNN* |
203 s5 0 NN******************** | 205 s5 0 NN******************** |
204 s6 0 TTC**** | 206 s6 0 TTC**** |
205 s8 0 NCCA | 207 s8 0 NCCA |
206 | 208 |
207 Running RNABOB with ``trna.des`` against ``F22B7.fa`` searches the top strand of the cosmid for | 209 Running RNABOB with ``trna.des`` against ``F22B7.fa`` searches the top strand of the cosmid for |
208 the above motif. ``trna.des`` hits twice, once on each strand. (F22B7 has several other tRNA genes | 210 the above motif. ``trna.des`` hits twice, once on each strand. (F22B7 has several other tRNA genes |
209 in it which the pattern fails to detect - this is *not* a pattern to use for tRNA genefinding!). | 211 in it which the pattern fails to detect - this is *not* a pattern to use for tRNA genefinding!). |
210 </help> | 212 ]]> |
213 </help> | |
211 <citations> | 214 <citations> |
212 <citation type="doi">10.1093/bioinformatics/6.4.325</citation> | 215 <citation type="doi">10.1093/bioinformatics/6.4.325</citation> |
213 <citation type="bibtex">@UNPUBLISHED{rnabob, | 216 <citation type="bibtex">@UNPUBLISHED{rnabob, |
214 author = {Eddy S.R}, | 217 author = {Eddy S.R}, |
215 title = {RNABOB: a program to search for RNA secondary structure motifs in sequence databases}, | 218 title = {RNABOB: a program to search for RNA secondary structure motifs in sequence databases}, |
216 note = {}}</citation> | 219 note = {}}</citation> |
217 </citations> | 220 </citations> |
218 </tool> | 221 </tool> |