changeset 4:51261dff08a5 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/LocARNAGraphClust commit f971832d2b34a182314e5201ea6895dd207c5923
author rnateam
date Mon, 13 Mar 2017 18:02:55 -0400
parents 606440a3852d
children 859ee7d8f207
files locarna_best_subtree.xml
diffstat 1 files changed, 78 insertions(+), 193 deletions(-) [+]
line wrap: on
line diff
--- a/locarna_best_subtree.xml	Mon Feb 27 12:02:38 2017 -0500
+++ b/locarna_best_subtree.xml	Mon Mar 13 18:02:55 2017 -0400
@@ -1,6 +1,6 @@
 <tool id="locarna_best_subtree" name="locarna_graphclust" version="0.1.0" >
   <requirements>
-    <requirement type="package" version="0.1.9">graphclust-wrappers</requirement>
+    <requirement type="package" version="0.1.10">graphclust-wrappers</requirement>
     <requirement type="package" version='1.8.10'>locarna</requirement>
     <requirement type="package" version='2.1'>rnaz</requirement>
     <requirement type="package" version='0.07'>perl-math-round</requirement>
@@ -11,9 +11,16 @@
   <command>
     <![CDATA[
 
-        'locARNAGraphClust.pl'  '$center_fa_file' '$tree_file' '$tree_matrix' '$data_map' $allow_overlap
+        locARNAGraphClust.pl 
+            '$center_fa_file'
+            '$tree_file'
+            '$tree_matrix'
+            '$data_map'
+            $allow_overlap
+            $free_endgaps
+
         #if str($param_type.param_type_selector) == "gclust"
-             $param_type.p
+            $param_type.p
             $param_type.max_diff_am
             $param_type.max_diff
             $param_type.tau
@@ -34,76 +41,74 @@
     <param type="data" name="data_map" label="data_map" format="txt" help="text format" />
     <param name="allow_overlap" type="boolean"  truevalue="1" falsevalue="0" label="Allow overlap in subtrees" help="otherwise ignore subtree if it contains overlapping sequences"/>
 
-    <conditional name="param_type">
-    <param name="param_type_selector" type="select" label="Choose the type of parameters">
-        <option value="locarna">LocARNA defaults</option>
-        <option value="gclust" selected="True">GrapClust defaults(changeable)</option>
+    <param name="free_endgaps" type="select" label="Free endgaps"
+            help="Specify whether gaps at the ends (all, 5', or 3' ends)
+                  of the sequences should be penalized or allowed for free.">
+        <option value="0">No free endgaps</option>
+        <option value="--free-endgaps">Free endgaps</option>
+        <option value="--free-endgaps-5">Free endgaps, only 5'</option>
+        <option value="--free-endgaps-3">Free endgaps, only 3'</option>
     </param>
-    <when value="gclust">
-
-      <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/>
-      <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/>
-      <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/>
-      <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/>
-      <param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" />
-
-      <param name="struct_weight" argument="struct-weight"
-              label="Structure weight" type="integer"
-              value="180" min="0" max="800" />
-       <param name="indel_opening" argument="indel-opening"
-              label="Indel opening score" type="integer"
-              value="-400" max="0" min="-1500" />
-       <param argument="indel" label="Indel score" type="integer"
-              value="-200" min="-1000" max="0" />
-
-       <param  name="alifold_consensus_dp"
-               type="boolean" checked="True"
-               truevalue="--alifold-consensus-dp" falsevalue=" "
-               label="Compute consensus dot plot by alifold" />
-
-       <param name="plfold_span"
-                type="integer" value="150" min="-1" max="400"
-                label="Maximum basepair span by RNAplfold (local folding); -1 for global folding" />
-
-         <param name="plfold_winsize"
+    <conditional name="param_type">
+        <param name="param_type_selector" type="select" label="Choose the type of parameters">
+            <option value="locarna">LocARNA defaults</option>
+            <option value="gclust" selected="True">GrapClust defaults(changeable)</option>
+        </param>
+        <when value="gclust">
+          <param name="p" type="float" value="0.001" size="5" label="minimal probability" help="-p"/>
+          <param name="max_diff_am" type="integer" value="50" size="5" label=" maximal difference for sizes of matched arcs" help="--max-diff-am"/>
+          <param argument="tau" type="integer" value="50" min="0" max="200" label="Sequence contribution at structure match in percent"/>
+          <param name="max_diff" type="integer" value="100" size="5" label="maximal difference for alignment traces" help="--max-diff"/>
+          <param name="plfold_minlen" type="integer" value="210" size="5" label="Minimal length of a sequences for which RNAplfold is used" />
+          <param name="struct_weight" argument="struct-weight"
+                  label="Structure weight" type="integer"
+                  value="180" min="0" max="800" />
+          <param name="indel_opening" argument="indel-opening"
+                  label="Indel opening score" type="integer"
+                  value="-400" max="0" min="-1500" />
+          <param argument="indel" label="Indel score" type="integer"
+                  value="-200" min="-1000" max="0" />
+          <param  name="alifold_consensus_dp"
+                   type="boolean" checked="True"
+                   truevalue="--alifold-consensus-dp" falsevalue=" "
+                   label="Compute consensus dot plot by alifold" />
+          <param name="plfold_span"
+                    type="integer" value="150" min="-1" max="400"
+                    label="Maximum basepair span by RNAplfold (local folding); -1 for global folding" />
+          <param name="plfold_winsize"
                 type="integer" value="300"  min="-1" max="800"
                 label="Window size for local folding" />
-
-
-    </when>
-    <when value="locarna">
-    </when>
-</conditional>
-
-  </inputs>
-  <outputs>
-    <data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" />
-  </outputs>
-  <tests>
-    <test>
-      <param name="tree_file" value="1.1.tree"/>
-      <param name="center_fa_file" value="1.1.center.fa"/>
-      <param name="data_map" value="data.map"/>
-      <param name="tree_matrix" value="1.1.matrix.tree"/>
-      <param name="allow_overlap" value="0"/>
-      <conditional name="param_type">
-        <param name="iteration_num_selector" value="gclust"/>
-        <param name="p" value="0.001"/>
-        <param name="max_diff_am" value="50"/>
-        <param name="tau" value="50"/>
-        <param name="max_diff" value="100"/>
-        <param name="plfold_minlen" value="210"/>
-        <param name="struct_weight" value="180"/>
-        <param name="indel_opening" value="-400"/>
-        <param name="indel" value="-200"/>
-        <param name="alifold_consensus_dp" value="--alifold-consensus-dp"/>
-        <param name="plfold_span" value="150"/>
-        <param name="plfold_winsize" value="300"/>
-      </conditional>
-      <output name="model_tree_stk" file="best_subtree.aln"/>
-    </test>
-  </tests>
-  <help>
+        </when>
+        <when value="locarna" />
+    </conditional>
+    </inputs>
+    <outputs>
+        <data name="model_tree_stk" format="stockholm" label="model.tree.stk" from_work_dir="MODEL/best_subtree.aln" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="tree_file" value="1.1.tree"/>
+            <param name="center_fa_file" value="1.1.center.fa"/>
+            <param name="data_map" value="data.map"/>
+            <param name="tree_matrix" value="1.1.matrix.tree"/>
+            <param name="allow_overlap" value="0"/>
+            <param name="free_endgaps" value="0"/>
+            <param name="param_type_selector" value="gclust"/>
+            <param name="p" value="0.001"/>
+            <param name="max_diff_am" value="50"/>
+            <param name="tau" value="50"/>
+            <param name="max_diff" value="100"/>
+            <param name="plfold_minlen" value="210"/>
+            <param name="struct_weight" value="180"/>
+            <param name="indel_opening" value="-400"/>
+            <param name="indel" value="-200"/>
+            <param name="alifold_consensus_dp" value="--alifold-consensus-dp"/>
+            <param name="plfold_span" value="150"/>
+            <param name="plfold_winsize" value="300"/>
+            <output name="model_tree_stk" file="best_subtree.aln"/>
+        </test>
+    </tests>
+    <help>
     <![CDATA[
 **What it does**
 
@@ -112,7 +117,6 @@
 This saves the calculation of pairwise all-vs-all similarities and construction of the guide tree.
 
 
-
 ]]>
   </help>
   <citations>
@@ -125,127 +129,8 @@
         organization={Omnipress}
       }
       </citation>
-      <citation type="bibtex">@Article{Will_Joshi_Hofacker-LocAR_Accur_bound-2012,
-  author =   {Will, Sebastian and Joshi, Tejal and Hofacker, Ivo L. and
-                  Stadler, Peter F. and Backofen, Rolf},
-  title =   {{LocARNA}-{P}: {Accurate} boundary prediction and improved
-                  detection of structural {RNAs}},
-  journal =   {RNA},
-  year =   {2012},
-  volume =   {18},
-  number =   {5},
-  pages =   {900-14},
-  user =   {will},
-  pmid =   {22450757},
-  doi =    {10.1261/rna.029041.111},
-  issn =    {1469-9001},
-  issn =    {1355-8382},
-  abstract =   {Current genomic screens for noncoding RNAs (ncRNAs) predict
-                  a large number of genomic regions containing potential
-                  structural ncRNAs. The analysis of these data requires
-                  highly accurate prediction of ncRNA boundaries and
-                  discrimination of promising candidate ncRNAs from weak
-                  predictions. Existing methods struggle with these goals
-                  because they rely on sequence-based multiple sequence
-                  alignments, which regularly misalign RNA structure and
-                  therefore do not support identification of structural
-                  similarities. To overcome this limitation, we compute
-                  columnwise and global reliabilities of alignments based on
-                  sequence and structure similarity; we refer to these
-                  structure-based alignment reliabilities as STARs. The
-                  columnwise STARs of alignments, or STAR profiles, provide a
-                  versatile tool for the manual and automatic analysis of
-                  ncRNAs. In particular, we improve the boundary prediction of
-                  the widely used ncRNA gene finder RNAz by a factor of 3 from
-                  a median deviation of 47 to 13 nt. Post-processing RNAz
-                  predictions, LocARNA-P's STAR score allows much stronger
-                  discrimination between true- and false-positive predictions
-                  than RNAz's own evaluation. The improved accuracy, in this
-                  scenario increased from AUC 0.71 to AUC 0.87, significantly
-                  reduces the cost of successive analysis steps. The
-                  ready-to-use software tool LocARNA-P produces
-                  structure-based multiple RNA alignments with associated
-                  columnwise STARs and predicts ncRNA boundaries. We provide
-                  additional results, a web server for LocARNA/LocARNA-P, and
-                  the software package, including documentation and a pipeline
-                  for refining screens for structural ncRNA, at
-                  http://www.bioinf.uni-freiburg.de/Supplements/LocARNA-P/.}
-}
-        </citation>
-        <citation type="bibtex">@Article{Will:etal:_infer_non_codin_rna_famil:PLOS2007,
-  author =   {Sebastian Will and Kristin Reiche and Ivo L. Hofacker and
-                  Peter F. Stadler and Rolf Backofen},
-  title =   {Inferring Non-Coding {RNA} Families and Classes by Means of
-                  Genome-Scale Structure-Based Clustering},
-  journal =   {PLoS Comput Biol},
-  year =   2007,
-  volume =       {3},
-  number =       {4},
-  pages =        {e65},
-  issn =         {1553-7358},
-  issn =         {1553-734X},
-  pmid =         {17432929},
-  doi =          {10.1371/journal.pcbi.0030065},
-  user =   {will},
-  abstract =   {The RFAM database defines families of ncRNAs by means of
-                  sequence similarities that are sufficientto establish
-                  homology. In some cases, such as microRNAs, box H/ACA
-                  snoRNAs, functional commonalities define classes of RNAs
-                  that are characterized by structural similarities, and
-                  typically consist ofmultiple RNA families. Recent advances
-                  in high-throughput transcriptomics and comparative genomics
-                  have produced very large sets of putative non-coding RNAs
-                  and regulatory RNA signals. For many ofthem, evidence for
-                  stabilizing selection acting on their secondary structures
-                  has been derived, and at least approximate models of their
-                  structures have been computed. The overwhelming majority of
-                  these hypo-thetical RNAs cannot be assigned to established
-                  families or classes. We present here a structure-based
-                  clustering approach that is capable of extracting putative
-                  RNA classesfrom genome-wide surveys for structured RNAs. The
-                  LocARNA tool implements a novel variant of theSankoff
-                  algorithm that is sufficiently fast to deal with several
-                  thousand candidate sequences. The method is also robust
-                  against false positive predictions, i.e., a contamination of
-                  the input data with unstructured ornon-conserved
-                  sequences. We have successfully tested the LocARNA-based
-                  clustering approach on the sequences of the
-                  RFAM-seedalignments. Furthermore, we have applied it to a
-                  previously published set of 3332 predicted structured
-                  elements in the Ciona intestinalis genomes (Missal et al.,
-                  Bioinformatics 21(S2), i77-i78). In addition torecovering
-                  e.g. tRNAs as a structure-based class, the method identifies
-                  several RNA families, including microRNA and snoRNA
-                  candidates, and suggests several novel classes of ncRNAs for
-                  which to-date norepresentative has been experimentally
-                  characterized.}
-}
-
-          </citation>
-          <citation type="bibtex">@Article{Smith:Heyne:Richter:Freib_RNA_Tools:NAR2010,
-  author =   {Smith, Cameron and Heyne, Steffen and Richter, Andreas S.
-                  and Will, Sebastian and Backofen, Rolf},
-  title =   {Freiburg {RNA} {Tools}: a web server integrating {IntaRNA},
-                  {ExpaRNA} and {LocARNA}},
-  journal =   NAR,
-  year =   {2010},
-  volume =   {38 Suppl},
-  number =   {},
-  pages =   {W373-7},
-  user =   {arichter},
-  pmid =   {20444875},
-  doi =    {10.1093/nar/gkq316},
-  issn =    {0305-1048},
-  issn =   {1362-4962},
-  abstract =   {The Freiburg RNA tools web server integrates three tools
-                  for the advanced analysis of RNA in a common web-based user
-                  interface. The tools IntaRNA, ExpaRNA and LocARNA support
-                  the prediction of RNA-RNA interaction, exact RNA matching
-                  and alignment of RNA, respectively. The Freiburg RNA tools
-                  web server and the software packages of the stand-alone
-                  tools are freely accessible at
-                  http://rna.informatik.uni-freiburg.de.}
-}
-            </citation>
+      <citation type="doi">10.1261/rna.029041.111</citation>
+      <citation type="doi">10.1371/journal.pcbi.0030065</citation>
+      <citation type="doi">10.1093/nar/gkq316</citation>
   </citations>
 </tool>