diff rnabob.xml @ 1:5a4b00c84f50 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rnabob commit 1527e05bcd748a2b3cef22e0e356697066a55635
author rnateam
date Sat, 11 Nov 2017 15:08:06 -0500
parents cd00b4fe6552
children
line wrap: on
line diff
--- a/rnabob.xml	Mon Dec 22 09:08:31 2014 -0500
+++ b/rnabob.xml	Sat Nov 11 15:08:06 2017 -0500
@@ -1,30 +1,31 @@
 <tool id="rbc_rnabob" name="RNABOB" version="2.2.1.0">
     <description>Fast Pattern searching for RNA secondary structures</description>
+
     <requirements>
         <requirement type="package" version="2.2.1">rnabob</requirement>
     </requirements>
-    <version_command>echo "2.2.1"</version_command>
-    <command>
-<![CDATA[
-    rnabob 
-    	-q
-    	$fancy
-    	$compStrands
-    	$skipOverlapping
-    	$descriptorFile
-    	$sequenceFile > $stdout
-]]>
-    </command>
     <stdio>
         <exit_code range="1:" level="fatal" description="Error occurred. Please check Tool Standard Error" />
         <exit_code range=":-1" level="fatal" description="Error occurred. Please check Tool Standard Error" />
     </stdio>
+    <version_command>echo "2.2.1"</version_command>
+    <command>
+<![CDATA[
+    rnabob
+        -q
+        $fancy
+        $compStrands
+        $skipOverlapping
+        $descriptorFile
+        $sequenceFile > $stdout
+]]>
+    </command>
     <inputs>
         <param name="descriptorFile" type="data" format="txt" multiple="false" label="Motif Descriptor File" help="This file contains the description of the motif for which to search"/>
-	    <param name="sequenceFile" type="data" format="fasta" multiple="false" label="Sequence File" help="This file specifies the sequence in which the motif will be searched"/>
-	    <param name="compStrands" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Also search on complementary strands" help="-c : Search both strands of the supplied sequence"/>
-	    <param name="skipOverlapping" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Skip overlapping matches" help="-s : This is a workaround to avoid a problem in the DNABANK, overlapping matches will be ignored"/>
-	    <param name="fancy" type="boolean" checked="false" truevalue="-F" falsevalue="" label="Show Alignments" help="Display full alignments to pattern"/>
+        <param name="sequenceFile" type="data" format="fasta" multiple="false" label="Sequence File" help="This file specifies the sequence in which the motif will be searched"/>
+        <param name="compStrands" type="boolean" truevalue="-c" falsevalue="" checked="false" label="Also search on complementary strands" help="-c : Search both strands of the supplied sequence"/>
+        <param name="skipOverlapping" type="boolean" truevalue="-s" falsevalue="" checked="false" label="Skip overlapping matches" help="-s : This is a workaround to avoid a problem in the DNABANK, overlapping matches will be ignored"/>
+        <param name="fancy" type="boolean" checked="false" truevalue="-F" falsevalue="" label="Show Alignments" help="Display full alignments to pattern"/>
     </inputs>
     <outputs>
         <data format="txt" name="stdout" label="${tool.name} on ${on_string}" />
@@ -48,6 +49,7 @@
         </test>
     </tests>
     <help>
+<![CDATA[
 **What RNABOB does**
 
 RNABOB allows searching a sequence database for RNA structural motifs.
@@ -59,158 +61,159 @@
 
 **Sequence database format**
 
-RNABOB is currently restricted to reading sequence files in FASTA format. 
+RNABOB is currently restricted to reading sequence files in FASTA format.
 The command line version of RNABOB can also read sequence files in GCG, EMBL, GenBank and other formats.
 
 -----
 
 **Descriptor file syntax**
 
-The descriptor file syntax is fairly powerful, and allows a great deal of freedom for specifying 
+The descriptor file syntax is fairly powerful, and allows a great deal of freedom for specifying
 RNA motifs. The syntax is therefore a bit complicated.
 
 The descriptor file has two parts: a **topology** description and an **explicit** description.
 
-The first non-blank, non-comment line of the file is the topology description. It defines the 
-order of occurrence of a series of single-stranded, double-stranded and related elements. Each 
-element must be given a unique name (a number, typically) and must be prefixed with '**s**', 
-'**h**', or '**r**', indicating single-strand, helical, or a relational element. Helical and 
+The first non-blank, non-comment line of the file is the topology description. It defines the
+order of occurrence of a series of single-stranded, double-stranded and related elements. Each
+element must be given a unique name (a number, typically) and must be prefixed with '**s**',
+'**h**', or '**r**', indicating single-strand, helical, or a relational element. Helical and
 relational elements are paired to other elements, which are suffixed by a prime, **\'**.
 
 For example::
 
-	\
-			h1 s1 h1'
+    \
+            h1 s1 h1'
 
-describes a hairpin loop structure with a simple helix and single-stranded loop. If the helix 
+describes a hairpin loop structure with a simple helix and single-stranded loop. If the helix
 always contained a non-canonical base pair at one position, the topology coud be described as::
 
-	\
-			h1 r1 h2 s1 h2' r1' h1'
+    \
+            h1 r1 h2 s1 h2' r1' h1'
 
-where r1,r1' indicate a correlation, where the sequence r1 constrains the sequence of r1'. 
+where r1,r1' indicate a correlation, where the sequence r1 constrains the sequence of r1'.
 (Helices are a special case of this.)
 
-The remaining non-comment, non-blank lines are explicit descriptions of each element in turn. Each 
-line contains 3 or 4 fields, separated by tabs or blank space. The first field is the name of the 
-element, from the topology description. The second field is the number of mismatches allowed in 
+The remaining non-comment, non-blank lines are explicit descriptions of each element in turn. Each
+line contains 3 or 4 fields, separated by tabs or blank space. The first field is the name of the
+element, from the topology description. The second field is the number of mismatches allowed in
 this element. The third field is the primary sequence constraint to apply to this element.
 
-Helices and relational element pairs are specified on a single line rather than two. Mismatches 
-and primary sequence constraints are given as pairs, separated by a colon '**:**'. The left side 
-is the constraint applied to the upstream element, and the right side is applied to the downstream 
+Helices and relational element pairs are specified on a single line rather than two. Mismatches
+and primary sequence constraints are given as pairs, separated by a colon '**:**'. The left side
+is the constraint applied to the upstream element, and the right side is applied to the downstream
 elements.
 
-The primary sequence constraint is given as a sequence of nucleotides. Any IUPAC single-letter 
-code is recognized, including N if the position can have any base identity. Allowed length 
-variations are specified with asterisks ``'*'``, where each ``*`` will allow either 0 or 1 N at 
+The primary sequence constraint is given as a sequence of nucleotides. Any IUPAC single-letter
+code is recognized, including N if the position can have any base identity. Allowed length
+variations are specified with asterisks ``'*'``, where each ``*`` will allow either 0 or 1 N at
 that position.
 
 For example::
 
-	\
-			GGAGG******NNNAUG
+    \
+            GGAGG******NNNAUG
 
-specifies a GGAGG Shine/Dalgarno site and an AUG initiation codon, separated by a spacer of 3 to 9 
+specifies a GGAGG Shine/Dalgarno site and an AUG initiation codon, separated by a spacer of 3 to 9
 nucleotides of any sequence.
 
 An alternative syntax can be used for very long gaps::
 
-	\
-			GGAGG[10]NNNAUG is the same as GGAGG**********NNNAUG
+    \
+            GGAGG[10]NNNAUG is the same as GGAGG**********NNNAUG
 
-Be careful defining variable length helices and relational elements; if the number and type (gap 
-or identity) of position do not match on left and right sides, the program will refuse to accept 
+Be careful defining variable length helices and relational elements; if the number and type (gap
+or identity) of position do not match on left and right sides, the program will refuse to accept
 the descriptor.
 
-Relational elements have an additional field which specifies a "transformation matrix" of four 
-nucleotides, specifying the rule for making the ``r'`` pattern from the ``r`` sequence in order 
-``A-C-G-T``. For example, the transformation matrix for a simple helix is ``TGCA``; if you allow 
-``G-U`` pairs, it is ``TGYR``. RNABOB allows ``G-U`` pairing by default and uses the ``TGYR`` 
+Relational elements have an additional field which specifies a "transformation matrix" of four
+nucleotides, specifying the rule for making the ``r'`` pattern from the ``r`` sequence in order
+``A-C-G-T``. For example, the transformation matrix for a simple helix is ``TGCA``; if you allow
+``G-U`` pairs, it is ``TGYR``. RNABOB allows ``G-U`` pairing by default and uses the ``TGYR``
 matrix for helical elements.
 
 For example, the explicit description of our hairpin might be:
 
 ::
 
-	\
-	 		h1 0:0 NNN:NNN
-	 		r1 0:0 R:N GNAN
-	 		h2 0:0 **NC:GN**
-		 	s1 0 UUCG
+    \
+             h1 0:0 NNN:NNN
+             r1 0:0 R:N GNAN
+             h2 0:0 **NC:GN**
+             s1 0 UUCG
 
-This describes a stem of 6 to 8 base pairs, in which the 4th pair from the bottom of the stem must 
-be a non-canonical GA pair. Note that, in general, the left side of the primary constraint for 
-helices and relational elements is redundant, and should be given as all N. In some cases it is 
-convenient to constrain the right side to require a particular base pair (GU, for instance) at one 
+This describes a stem of 6 to 8 base pairs, in which the 4th pair from the bottom of the stem must
+be a non-canonical GA pair. Note that, in general, the left side of the primary constraint for
+helices and relational elements is redundant, and should be given as all N. In some cases it is
+convenient to constrain the right side to require a particular base pair (GU, for instance) at one
 position.
 
-A note on mismatches: The split format for helices and relational elements works like this. The 
-number on the left constrains the primary sequence match of the left side of the primary 
-constraint. The number on the right constrains the match of the right side of the primary 
-constraint, *after* that side has been constructed according to the sequence on the left. In other 
-words, the number on the left constrains the mismatches in primary sequence only, while the number 
+A note on mismatches: The split format for helices and relational elements works like this. The
+number on the left constrains the primary sequence match of the left side of the primary
+constraint. The number on the right constrains the match of the right side of the primary
+constraint, *after* that side has been constructed according to the sequence on the left. In other
+words, the number on the left constrains the mismatches in primary sequence only, while the number
 on the right will constrain the number of mispaired positions in the helix.
 
-Finally: any line that begins with a pound sign '#' is a comment line, and will not be interpreted 
+Finally: any line that begins with a pound sign '#' is a comment line, and will not be interpreted
 by the pattern compiler.
 
 **Options**
 
 The behavior of RNABOB can be modified by use of the following options:
 
-*Complement*: Selecting this option will cause RNABOB to search for the pattern also on the 
+*Complement*: Selecting this option will cause RNABOB to search for the pattern also on the
 complementary strands.
 
-*Skip*: This is a workaround to avoid a problem in the DNABANK. There are some sequences in the 
-database which have long stretches of ambiguous sequence (N's). Descriptors with no primary 
-sequence constraints will match these garbage sequences at many, many positions, and generate huge 
-outputs. This option toggles a search strategy that skips forward a pattern-length rather than a 
-single base when a match is found, thus printing out only a single match when overlapping matches 
-are found. 
+*Skip*: This is a workaround to avoid a problem in the DNABANK. There are some sequences in the
+database which have long stretches of ambiguous sequence (N's). Descriptors with no primary
+sequence constraints will match these garbage sequences at many, many positions, and generate huge
+outputs. This option toggles a search strategy that skips forward a pattern-length rather than a
+single base when a match is found, thus printing out only a single match when overlapping matches
+are found.
 
 **Examples**
 
-The following example descriptors included in the source distribution 
+The following example descriptors included in the source distribution
 (http://selab.janelia.org/software/rnabob/rnabob.tar.gz):
 
-	- trna.des - a general descriptor of a tRNA structure
-	- r17.des - descriptor of the consensus binding site for the r17 phage coat protein
-	- pseudoknot.des - description of a simple pseudoknotted structure
+    - trna.des - a general descriptor of a tRNA structure
+    - r17.des - descriptor of the consensus binding site for the r17 phage coat protein
+    - pseudoknot.des - description of a simple pseudoknotted structure
 
-An example cosmid ``F22B7.fa`` from the *C. elegans* genome sequencing project is also provided 
+An example cosmid ``F22B7.fa`` from the *C. elegans* genome sequencing project is also provided
 for running these descriptors against.
 
 ::
 
-	\
-		# trna.des
-		#
-		# Generalized descriptor of a tRNA cloverleaf. Doesn't
-		# find them all though. 
-		#
+    \
+        # trna.des
+        #
+        # Generalized descriptor of a tRNA cloverleaf. Doesn't
+        # find them all though.
+        #
 
-		h1 s1 h2 s2 h2' s3 h3 s4 h3' s5 h4 s6 h4' h1' s8
+        h1 s1 h2 s2 h2' s3 h3 s4 h3' s5 h4 s6 h4' h1' s8
 
-		h1 0:2 NNNNNNN:NNNNNNN
-		h2 0:1 *NNN:NNN*
-		h3 0:1 NNNNN:NNNNN
-		h4 0:1 NNNNN:NNNNN
-		s1 0 TN
-		s2 0 NNNN**********
-		s3 0 N
-		s4 0 NNNNNN*
-		s5 0 NN********************
-		s6 0 TTC****
-		s8 0 NCCA
+        h1 0:2 NNNNNNN:NNNNNNN
+        h2 0:1 *NNN:NNN*
+        h3 0:1 NNNNN:NNNNN
+        h4 0:1 NNNNN:NNNNN
+        s1 0 TN
+        s2 0 NNNN**********
+        s3 0 N
+        s4 0 NNNNNN*
+        s5 0 NN********************
+        s6 0 TTC****
+        s8 0 NCCA
 
-Running RNABOB with ``trna.des`` against ``F22B7.fa`` searches the top strand of the cosmid for 
-the above motif. ``trna.des`` hits twice, once on each strand. (F22B7 has several other tRNA genes 
+Running RNABOB with ``trna.des`` against ``F22B7.fa`` searches the top strand of the cosmid for
+the above motif. ``trna.des`` hits twice, once on each strand. (F22B7 has several other tRNA genes
 in it which the pattern fails to detect - this is *not* a pattern to use for tRNA genefinding!).
-    </help> 
+]]>
+    </help>
     <citations>
-	<citation type="doi">10.1093/bioinformatics/6.4.325</citation>
-	<citation type="bibtex">@UNPUBLISHED{rnabob,
+        <citation type="doi">10.1093/bioinformatics/6.4.325</citation>
+        <citation type="bibtex">@UNPUBLISHED{rnabob,
 author = {Eddy S.R},
 title = {RNABOB: a program to search for RNA secondary structure motifs in sequence databases},
 note = {}}</citation>