comparison rnabob.xml @ 1:5a4b00c84f50 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rnabob commit 1527e05bcd748a2b3cef22e0e356697066a55635
author rnateam
date Sat, 11 Nov 2017 15:08:06 -0500
parents cd00b4fe6552
children
comparison
equal deleted inserted replaced
0:cd00b4fe6552 1:5a4b00c84f50
1 <tool id="rbc_rnabob" name="RNABOB" version="2.2.1.0"> 1 <tool id="rbc_rnabob" name="RNABOB" version="2.2.1.0">
2 <description>Fast Pattern searching for RNA secondary structures</description> 2 <description>Fast Pattern searching for RNA secondary structures</description>
3
3 <requirements> 4 <requirements>
4 <requirement type="package" version="2.2.1">rnabob</requirement> 5 <requirement type="package" version="2.2.1">rnabob</requirement>
5 </requirements> 6 </requirements>
6 <version_command>echo "2.2.1"</version_command>
7 <command>
8 <![CDATA[
9 rnabob
10 -q
11 $fancy
12 $compStrands
13 $skipOverlapping
14 $descriptorFile
15 $sequenceFile > $stdout
16 ]]>
17 </command>
18 <stdio> 7 <stdio>
19 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" /> 8 <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" />
20 <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" /> 9 <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" />
21 </stdio> 10 </stdio>
11 <version_command>echo "2.2.1"</version_command>
12 <command>
13 <![CDATA[
14 rnabob
15 -q
16 $fancy
17 $compStrands
18 $skipOverlapping
19 $descriptorFile
20 $sequenceFile > $stdout
21 ]]>
22 </command>
22 <inputs> 23 <inputs>
23 <param name="descriptorFile" type="data" format="txt" multiple="false" label="Motif Descriptor File" help="This file contains the description of the motif for which to search"/> 24 <param name="descriptorFile" type="data" format="txt" multiple="false" label="Motif Descriptor File" help="This file contains the description of the motif for which to search"/>
24 <param name="sequenceFile" type="data" format="fasta" multiple="false" label="Sequence File" help="This file specifies the sequence in which the motif will be searched"/> 25 <param name="sequenceFile" type="data" format="fasta" multiple="false" label="Sequence File" help="This file specifies the sequence in which the motif will be searched"/>
25 <param name="compStrands" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Also search on complementary strands" help="-c : Search both strands of the supplied sequence"/> 26 <param name="compStrands" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Also search on complementary strands" help="-c : Search both strands of the supplied sequence"/>
26 <param name="skipOverlapping" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Skip overlapping matches" help="-s : This is a workaround to avoid a problem in the DNABANK, overlapping matches will be ignored"/> 27 <param name="skipOverlapping" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Skip overlapping matches" help="-s : This is a workaround to avoid a problem in the DNABANK, overlapping matches will be ignored"/>
27 <param name="fancy" type="boolean" checked="false" truevalue="-F" falsevalue="" label="Show Alignments" help="Display full alignments to pattern"/> 28 <param name="fancy" type="boolean" checked="false" truevalue="-F" falsevalue="" label="Show Alignments" help="Display full alignments to pattern"/>
28 </inputs> 29 </inputs>
29 <outputs> 30 <outputs>
30 <data format="txt" name="stdout" label="${tool.name} on ${on_string}" /> 31 <data format="txt" name="stdout" label="${tool.name} on ${on_string}" />
31 </outputs> 32 </outputs>
32 <tests> 33 <tests>
46 <param name="fancy" value="False" /> 47 <param name="fancy" value="False" />
47 <output name="stdout" file="trna.bob" /> 48 <output name="stdout" file="trna.bob" />
48 </test> 49 </test>
49 </tests> 50 </tests>
50 <help> 51 <help>
52 <![CDATA[
51 **What RNABOB does** 53 **What RNABOB does**
52 54
53 RNABOB allows searching a sequence database for RNA structural motifs. 55 RNABOB allows searching a sequence database for RNA structural motifs.
54 The probe motif is specified in a *descriptor* file, 56 The probe motif is specified in a *descriptor* file,
55 which describes its primary sequence, secondary structure, and tertiary constraints. 57 which describes its primary sequence, secondary structure, and tertiary constraints.
57 59
58 ----- 60 -----
59 61
60 **Sequence database format** 62 **Sequence database format**
61 63
62 RNABOB is currently restricted to reading sequence files in FASTA format. 64 RNABOB is currently restricted to reading sequence files in FASTA format.
63 The command line version of RNABOB can also read sequence files in GCG, EMBL, GenBank and other formats. 65 The command line version of RNABOB can also read sequence files in GCG, EMBL, GenBank and other formats.
64 66
65 ----- 67 -----
66 68
67 **Descriptor file syntax** 69 **Descriptor file syntax**
68 70
69 The descriptor file syntax is fairly powerful, and allows a great deal of freedom for specifying 71 The descriptor file syntax is fairly powerful, and allows a great deal of freedom for specifying
70 RNA motifs. The syntax is therefore a bit complicated. 72 RNA motifs. The syntax is therefore a bit complicated.
71 73
72 The descriptor file has two parts: a **topology** description and an **explicit** description. 74 The descriptor file has two parts: a **topology** description and an **explicit** description.
73 75
74 The first non-blank, non-comment line of the file is the topology description. It defines the 76 The first non-blank, non-comment line of the file is the topology description. It defines the
75 order of occurrence of a series of single-stranded, double-stranded and related elements. Each 77 order of occurrence of a series of single-stranded, double-stranded and related elements. Each
76 element must be given a unique name (a number, typically) and must be prefixed with '**s**', 78 element must be given a unique name (a number, typically) and must be prefixed with '**s**',
77 '**h**', or '**r**', indicating single-strand, helical, or a relational element. Helical and 79 '**h**', or '**r**', indicating single-strand, helical, or a relational element. Helical and
78 relational elements are paired to other elements, which are suffixed by a prime, **\'**. 80 relational elements are paired to other elements, which are suffixed by a prime, **\'**.
79 81
80 For example:: 82 For example::
81 83
82 \ 84 \
83 h1 s1 h1' 85 h1 s1 h1'
84 86
85 describes a hairpin loop structure with a simple helix and single-stranded loop. If the helix 87 describes a hairpin loop structure with a simple helix and single-stranded loop. If the helix
86 always contained a non-canonical base pair at one position, the topology coud be described as:: 88 always contained a non-canonical base pair at one position, the topology coud be described as::
87 89
88 \ 90 \
89 h1 r1 h2 s1 h2' r1' h1' 91 h1 r1 h2 s1 h2' r1' h1'
90 92
91 where r1,r1' indicate a correlation, where the sequence r1 constrains the sequence of r1'. 93 where r1,r1' indicate a correlation, where the sequence r1 constrains the sequence of r1'.
92 (Helices are a special case of this.) 94 (Helices are a special case of this.)
93 95
94 The remaining non-comment, non-blank lines are explicit descriptions of each element in turn. Each 96 The remaining non-comment, non-blank lines are explicit descriptions of each element in turn. Each
95 line contains 3 or 4 fields, separated by tabs or blank space. The first field is the name of the 97 line contains 3 or 4 fields, separated by tabs or blank space. The first field is the name of the
96 element, from the topology description. The second field is the number of mismatches allowed in 98 element, from the topology description. The second field is the number of mismatches allowed in
97 this element. The third field is the primary sequence constraint to apply to this element. 99 this element. The third field is the primary sequence constraint to apply to this element.
98 100
99 Helices and relational element pairs are specified on a single line rather than two. Mismatches 101 Helices and relational element pairs are specified on a single line rather than two. Mismatches
100 and primary sequence constraints are given as pairs, separated by a colon '**:**'. The left side 102 and primary sequence constraints are given as pairs, separated by a colon '**:**'. The left side
101 is the constraint applied to the upstream element, and the right side is applied to the downstream 103 is the constraint applied to the upstream element, and the right side is applied to the downstream
102 elements. 104 elements.
103 105
104 The primary sequence constraint is given as a sequence of nucleotides. Any IUPAC single-letter 106 The primary sequence constraint is given as a sequence of nucleotides. Any IUPAC single-letter
105 code is recognized, including N if the position can have any base identity. Allowed length 107 code is recognized, including N if the position can have any base identity. Allowed length
106 variations are specified with asterisks ``'*'``, where each ``*`` will allow either 0 or 1 N at 108 variations are specified with asterisks ``'*'``, where each ``*`` will allow either 0 or 1 N at
107 that position. 109 that position.
108 110
109 For example:: 111 For example::
110 112
111 \ 113 \
112 GGAGG******NNNAUG 114 GGAGG******NNNAUG
113 115
114 specifies a GGAGG Shine/Dalgarno site and an AUG initiation codon, separated by a spacer of 3 to 9 116 specifies a GGAGG Shine/Dalgarno site and an AUG initiation codon, separated by a spacer of 3 to 9
115 nucleotides of any sequence. 117 nucleotides of any sequence.
116 118
117 An alternative syntax can be used for very long gaps:: 119 An alternative syntax can be used for very long gaps::
118 120
119 \ 121 \
120 GGAGG[10]NNNAUG is the same as GGAGG**********NNNAUG 122 GGAGG[10]NNNAUG is the same as GGAGG**********NNNAUG
121 123
122 Be careful defining variable length helices and relational elements; if the number and type (gap 124 Be careful defining variable length helices and relational elements; if the number and type (gap
123 or identity) of position do not match on left and right sides, the program will refuse to accept 125 or identity) of position do not match on left and right sides, the program will refuse to accept
124 the descriptor. 126 the descriptor.
125 127
126 Relational elements have an additional field which specifies a "transformation matrix" of four 128 Relational elements have an additional field which specifies a "transformation matrix" of four
127 nucleotides, specifying the rule for making the ``r'`` pattern from the ``r`` sequence in order 129 nucleotides, specifying the rule for making the ``r'`` pattern from the ``r`` sequence in order
128 ``A-C-G-T``. For example, the transformation matrix for a simple helix is ``TGCA``; if you allow 130 ``A-C-G-T``. For example, the transformation matrix for a simple helix is ``TGCA``; if you allow
129 ``G-U`` pairs, it is ``TGYR``. RNABOB allows ``G-U`` pairing by default and uses the ``TGYR`` 131 ``G-U`` pairs, it is ``TGYR``. RNABOB allows ``G-U`` pairing by default and uses the ``TGYR``
130 matrix for helical elements. 132 matrix for helical elements.
131 133
132 For example, the explicit description of our hairpin might be: 134 For example, the explicit description of our hairpin might be:
133 135
134 :: 136 ::
135 137
136 \ 138 \
137 h1 0:0 NNN:NNN 139 h1 0:0 NNN:NNN
138 r1 0:0 R:N GNAN 140 r1 0:0 R:N GNAN
139 h2 0:0 **NC:GN** 141 h2 0:0 **NC:GN**
140 s1 0 UUCG 142 s1 0 UUCG
141 143
142 This describes a stem of 6 to 8 base pairs, in which the 4th pair from the bottom of the stem must 144 This describes a stem of 6 to 8 base pairs, in which the 4th pair from the bottom of the stem must
143 be a non-canonical GA pair. Note that, in general, the left side of the primary constraint for 145 be a non-canonical GA pair. Note that, in general, the left side of the primary constraint for
144 helices and relational elements is redundant, and should be given as all N. In some cases it is 146 helices and relational elements is redundant, and should be given as all N. In some cases it is
145 convenient to constrain the right side to require a particular base pair (GU, for instance) at one 147 convenient to constrain the right side to require a particular base pair (GU, for instance) at one
146 position. 148 position.
147 149
148 A note on mismatches: The split format for helices and relational elements works like this. The 150 A note on mismatches: The split format for helices and relational elements works like this. The
149 number on the left constrains the primary sequence match of the left side of the primary 151 number on the left constrains the primary sequence match of the left side of the primary
150 constraint. The number on the right constrains the match of the right side of the primary 152 constraint. The number on the right constrains the match of the right side of the primary
151 constraint, *after* that side has been constructed according to the sequence on the left. In other 153 constraint, *after* that side has been constructed according to the sequence on the left. In other
152 words, the number on the left constrains the mismatches in primary sequence only, while the number 154 words, the number on the left constrains the mismatches in primary sequence only, while the number
153 on the right will constrain the number of mispaired positions in the helix. 155 on the right will constrain the number of mispaired positions in the helix.
154 156
155 Finally: any line that begins with a pound sign '#' is a comment line, and will not be interpreted 157 Finally: any line that begins with a pound sign '#' is a comment line, and will not be interpreted
156 by the pattern compiler. 158 by the pattern compiler.
157 159
158 **Options** 160 **Options**
159 161
160 The behavior of RNABOB can be modified by use of the following options: 162 The behavior of RNABOB can be modified by use of the following options:
161 163
162 *Complement*: Selecting this option will cause RNABOB to search for the pattern also on the 164 *Complement*: Selecting this option will cause RNABOB to search for the pattern also on the
163 complementary strands. 165 complementary strands.
164 166
165 *Skip*: This is a workaround to avoid a problem in the DNABANK. There are some sequences in the 167 *Skip*: This is a workaround to avoid a problem in the DNABANK. There are some sequences in the
166 database which have long stretches of ambiguous sequence (N's). Descriptors with no primary 168 database which have long stretches of ambiguous sequence (N's). Descriptors with no primary
167 sequence constraints will match these garbage sequences at many, many positions, and generate huge 169 sequence constraints will match these garbage sequences at many, many positions, and generate huge
168 outputs. This option toggles a search strategy that skips forward a pattern-length rather than a 170 outputs. This option toggles a search strategy that skips forward a pattern-length rather than a
169 single base when a match is found, thus printing out only a single match when overlapping matches 171 single base when a match is found, thus printing out only a single match when overlapping matches
170 are found. 172 are found.
171 173
172 **Examples** 174 **Examples**
173 175
174 The following example descriptors included in the source distribution 176 The following example descriptors included in the source distribution
175 (http://selab.janelia.org/software/rnabob/rnabob.tar.gz): 177 (http://selab.janelia.org/software/rnabob/rnabob.tar.gz):
176 178
177 - trna.des - a general descriptor of a tRNA structure 179 - trna.des - a general descriptor of a tRNA structure
178 - r17.des - descriptor of the consensus binding site for the r17 phage coat protein 180 - r17.des - descriptor of the consensus binding site for the r17 phage coat protein
179 - pseudoknot.des - description of a simple pseudoknotted structure 181 - pseudoknot.des - description of a simple pseudoknotted structure
180 182
181 An example cosmid ``F22B7.fa`` from the *C. elegans* genome sequencing project is also provided 183 An example cosmid ``F22B7.fa`` from the *C. elegans* genome sequencing project is also provided
182 for running these descriptors against. 184 for running these descriptors against.
183 185
184 :: 186 ::
185 187
186 \ 188 \
187 # trna.des 189 # trna.des
188 # 190 #
189 # Generalized descriptor of a tRNA cloverleaf. Doesn't 191 # Generalized descriptor of a tRNA cloverleaf. Doesn't
190 # find them all though. 192 # find them all though.
191 # 193 #
192 194
193 h1 s1 h2 s2 h2' s3 h3 s4 h3' s5 h4 s6 h4' h1' s8 195 h1 s1 h2 s2 h2' s3 h3 s4 h3' s5 h4 s6 h4' h1' s8
194 196
195 h1 0:2 NNNNNNN:NNNNNNN 197 h1 0:2 NNNNNNN:NNNNNNN
196 h2 0:1 *NNN:NNN* 198 h2 0:1 *NNN:NNN*
197 h3 0:1 NNNNN:NNNNN 199 h3 0:1 NNNNN:NNNNN
198 h4 0:1 NNNNN:NNNNN 200 h4 0:1 NNNNN:NNNNN
199 s1 0 TN 201 s1 0 TN
200 s2 0 NNNN********** 202 s2 0 NNNN**********
201 s3 0 N 203 s3 0 N
202 s4 0 NNNNNN* 204 s4 0 NNNNNN*
203 s5 0 NN******************** 205 s5 0 NN********************
204 s6 0 TTC**** 206 s6 0 TTC****
205 s8 0 NCCA 207 s8 0 NCCA
206 208
207 Running RNABOB with ``trna.des`` against ``F22B7.fa`` searches the top strand of the cosmid for 209 Running RNABOB with ``trna.des`` against ``F22B7.fa`` searches the top strand of the cosmid for
208 the above motif. ``trna.des`` hits twice, once on each strand. (F22B7 has several other tRNA genes 210 the above motif. ``trna.des`` hits twice, once on each strand. (F22B7 has several other tRNA genes
209 in it which the pattern fails to detect - this is *not* a pattern to use for tRNA genefinding!). 211 in it which the pattern fails to detect - this is *not* a pattern to use for tRNA genefinding!).
210 </help> 212 ]]>
213 </help>
211 <citations> 214 <citations>
212 <citation type="doi">10.1093/bioinformatics/6.4.325</citation> 215 <citation type="doi">10.1093/bioinformatics/6.4.325</citation>
213 <citation type="bibtex">@UNPUBLISHED{rnabob, 216 <citation type="bibtex">@UNPUBLISHED{rnabob,
214 author = {Eddy S.R}, 217 author = {Eddy S.R},
215 title = {RNABOB: a program to search for RNA secondary structure motifs in sequence databases}, 218 title = {RNABOB: a program to search for RNA secondary structure motifs in sequence databases},
216 note = {}}</citation> 219 note = {}}</citation>
217 </citations> 220 </citations>
218 </tool> 221 </tool>