view RNAShapes.xml @ 3:7adb2518f6e9 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/rna_shapes commit 1527e05bcd748a2b3cef22e0e356697066a55635
author rnateam
date Sat, 11 Nov 2017 15:07:41 -0500
parents f33190f18ee6
children
line wrap: on
line source

<tool id="RNAshapes" name="RNAshapes" version="3.3.0">
  <description>RNA Secondary structure prediction</description>
  <macros>
    <token name="@EXECUTABLE@">RNAshapes</token>
    <macro name="macro_param_shape_level">
      <param name="param_shape_level" type="select" label="Shape Level" help="(--shapeLevel) 1: most concrete shape, ... 5: most abstract shape.">
        <option value="1">1 most concrete</option>
        <option value="2">2</option>
        <option value="3">3</option>
        <option value="4">4</option>
        <option value="5" selected="True">5 most abstract</option>
      </param>
    </macro>
    <macro name="macro_param_grammar">
      <param name="param_grammar" type="select" label="Dangling End Energies" help="(--grammar) See below for details.">
        <option value="macrostate" selected="True">macrostate</option>
        <option value="microstate">microstate</option>
        <option value="nodangle">nodangle</option>
        <option value="overdangle">overdangle</option>
      </param>
    </macro>
    <macro name="macro_param_low_prob_filter">
      <param name="param_low_prob_filter" type="float" value="1e-6" min="0" max="1" label="Low Probability Filter" help="(--lowProbFilter) This option sets a barrier for filtering out results with very low probabilities during calculation."/>
    </macro>
    <macro name="macro_param_bppm_threshold">
      <param name="param_bppm_threshold" type="float" value="1e-5" min="0" max="1" label="BPPM Threshold" help="(--bppmThreshold) Set the threshold for base pair probabilities included in the postscripts output."/>
    </macro>
    <macro name="macro_param_out_low_prob_filter">
      <param name="param_out_low_prob_filter" type="float" value="0" min="0" max="1" label="Output Low Probability Filter" help="(--outputLowProbFilter) This option sets a filter for omitting low probability results during output."/>
    </macro>
    <macro name="macro_param_show_samples">
      <param name="param_show_samples" type="boolean" truevalue="1" falsevalue="0" label="Show Samples" help="(--showSamples) Inspect the samples drawn by stochastic backtrace."/>
    </macro>
    <macro name="macro_param_allow_lp">
      <param name="param_allow_lp" type="boolean" truevalue="1" falsevalue="0" label="Lonely Base-Pairs" help="(--allowLP) Allow lonely base pairs."/>
    </macro>
    <macro name="macro_param_prob_decimals">
      <param name="param_prob_decimals" type="integer" value="7" min="0" label="Number of Printed Decimals" help="(--probDecimals) Number of used digits to print probabilities."/>
    </macro>
    <macro name="macro_param_num_samples">
      <param name="param_num_samples" type="integer" value="1000" min="0" label="Number of Samples" help="(--numSamples) Sets the number of samples that are drawn to estimate shape probabilities."/>
    </macro>
    <macro name="macro_param_absolute_deviation">
      <param name="param_absolute_deviation" type="float" optional="True" label="Absolute Deviation" help="(--absoluteDeviation) This sets the energy range as an absolute value of the minimum free energy. Cannot be combined with --relativeDeviation.">
        <validator type="in_range" min="0"/> 
      </param>
    </macro>
    <macro name="macro_param_relative_deviation">
      <param name="param_relative_deviation" type="float" optional="True" label="Relative Deviation" help="(--relativeDeviation) This sets the energy range as percentage of the minimum free energy. Cannot be combined with --absoluteDeviation.">
        <validator type="in_range" min="0"/> 
      </param>
    </macro>
    <macro name="macro_param_window_size">
      <param name="param_window_size" type="integer" optional="True" label="Window Size" help="(--windowSize) Activates window mode and computes substrings of this size. See below for details.">   
        <validator type="in_range" min="1"/> 
      </param>
    </macro>
    <macro name="macro_param_window_increment">
      <param name="param_window_increment" type="integer" optional="True" label="Window Increment" help="(--windowIncrement) Increment Size. See below for details.">   
        <validator type="in_range" min="1"/> 
      </param>
    </macro>
    <macro name="macro_param_structure_probs">
      <param name="param_structure_probs" type="boolean" truevalue="1" falsevalue="0" label="Structure Probabilities" help="(--structureProbs) In addition to free energy also the probability of structures is calculated."/>
    </macro>
  </macros>
  <requirements>
    <requirement type="binary">@EXECUTABLE@</requirement>
    <requirement type="package" version="3.3.0">rnashapes</requirement>
  </requirements>
  <command detect_errors="aggressive">
<![CDATA[
  RNAshapes 
  --mode $param_cond_mode.param_mode
  #if $param_cond_mode.param_mode != 'outside':
    > $param_out
  #end if 
  #if $varExists('param_cond_mode.param_shape_level'):
    --shapeLevel $param_cond_mode.param_shape_level
  #end if
  #if $varExists('param_cond_mode.param_low_prob_filter'):
    --lowProbFilter $param_cond_mode.param_low_prob_filter
  #end if
  #if $varExists('param_cond_mode.param_bppm_threshold'):
    --bppmThreshold $param_cond_mode.param_bppm_threshold
  #end if
  #if $varExists('param_cond_mode.param_out_low_prob_filter'):
    --outputLowProbFilter $param_cond_mode.param_out_low_prob_filter
  #end if
  #if $varExists('param_cond_mode.param_show_samples'):
    --showSamples $param_cond_mode.param_show_samples
  #end if
  #if $varExists('param_cond_mode.param_prob_decimals'):
    --probDecimals $param_cond_mode.param_prob_decimals
  #end if
  #if $varExists('param_cond_mode.param_num_samples'):
    --numSamples $param_cond_mode.param_num_samples
  #end if
  #if $varExists('param_cond_mode.param_absolute_deviation') and $param_cond_mode.param_absolute_deviation:
    --absoluteDeviation $param_cond_mode.param_absolute_deviation
  #end if
  #if $varExists('param_cond_mode.param_relative_deviation') and $param_cond_mode.param_relative_deviation:
    --relativeDeviation $param_cond_mode.param_relative_deviation
  #end if
  #if $varExists('param_cond_mode.param_window_size') and $param_cond_mode.param_window_size:
    --windowSize $param_cond_mode.param_window_size
  #end if
  #if $varExists('param_cond_mode.param_window_increment') and $param_cond_mode.param_window_increment:
    --windowIncrement $param_cond_mode.param_window_increment
  #end if
  #if $varExists('param_cond_mode.param_structure_probs'):
    --structureProbs $param_cond_mode.param_structure_probs
  #end if
  --grammar $param_grammar
  --allowLP $param_allow_lp
  $param_in
  ]]>
  </command>

  <inputs>
    <param name="param_in" type="data" format="fasta" optional="False" label="Input: Fasta file" />

    <conditional name="param_cond_mode"> 
      <param name="param_mode" type="select" label="Calculation Mode" help="(--mode) For a description of the different modes see below.">
        <option value="shapes" selected="True">shapes</option>
        <option value="mfe">mfe</option>
        <option value="subopt">subopt</option>
        <option value="probs">probs</option>
        <option value="sample">sample</option>
        <option value="cast">cast</option>
        <option value="eval">eval</option>
        <option value="abstract">abstract</option>
        <option value="outside">outside</option>
        <option value="mea">mea</option>
      </param>
      <when value="shapes">
        <expand macro="macro_param_structure_probs"/>
        <expand macro="macro_param_absolute_deviation"/>
        <expand macro="macro_param_relative_deviation"/>
        <expand macro="macro_param_shape_level"/>
        <expand macro="macro_param_window_size"/>
        <expand macro="macro_param_window_increment"/>
      </when>
      <when value="mfe">
        <expand macro="macro_param_structure_probs"/>
        <expand macro="macro_param_shape_level"/>
        <expand macro="macro_param_window_size"/>
        <expand macro="macro_param_window_increment"/>
      </when>
      <when value="subopt">
        <expand macro="macro_param_structure_probs"/>
        <expand macro="macro_param_absolute_deviation"/>
        <expand macro="macro_param_relative_deviation"/>
        <expand macro="macro_param_shape_level"/>
        <expand macro="macro_param_window_size"/>
        <expand macro="macro_param_window_increment"/>
      </when>
      <when value="probs">
        <expand macro="macro_param_structure_probs"/>
        <expand macro="macro_param_low_prob_filter"/>
        <expand macro="macro_param_out_low_prob_filter"/>
        <expand macro="macro_param_window_size"/>
        <expand macro="macro_param_window_increment"/>
        <expand macro="macro_param_prob_decimals"/>
      </when>
      <when value="sample">
        <expand macro="macro_param_structure_probs"/>
        <expand macro="macro_param_num_samples"/>
        <expand macro="macro_param_show_samples"/>
        <expand macro="macro_param_out_low_prob_filter"/>
        <expand macro="macro_param_shape_level"/>
        <expand macro="macro_param_window_size"/>
        <expand macro="macro_param_window_increment"/>
        <expand macro="macro_param_prob_decimals"/>
      </when>
      <when value="cast">
        <expand macro="macro_param_structure_probs"/>
        <expand macro="macro_param_absolute_deviation"/>
        <expand macro="macro_param_relative_deviation"/>
        <expand macro="macro_param_shape_level"/>
      </when>
      <when value="eval">
        <expand macro="macro_param_shape_level"/>
      </when>
      <when value="abstract">
        <expand macro="macro_param_shape_level"/>
      </when>
      <when value="outside">
        <expand macro="macro_param_bppm_threshold"/>
      </when>
      <when value="mea">
        <expand macro="macro_param_structure_probs"/>
        <expand macro="macro_param_shape_level"/>
        <expand macro="macro_param_window_size"/>
        <expand macro="macro_param_window_increment"/>
        <expand macro="macro_param_prob_decimals"/>
      </when>

    </conditional>

    <expand macro="macro_param_grammar"/>
    <expand macro="macro_param_allow_lp"/>

  </inputs>
  <outputs>
    <data name="param_out" format="txt">
      <filter>param_cond_mode['param_mode'] != "outside"</filter>
    </data>
    <collection name="param_out_dotplot" type="list" label="rna_eps outputs">
      <filter>param_cond_mode['param_mode'] == "outside"</filter>
      <discover_datasets pattern="(?P&lt;designation&gt;.*)\.ps"  ext="rna_eps" visible="true"/>
    </collection>
  </outputs>
    <tests>
        <test>
            <param name="param_in" value="rnashapes_input1.fa"/>
            <param name="param_mode" value="mfe"/>
            <output name="param_out" file="rnashapes_result1.txt"/>
        </test>
    </tests>

  <help>

**What it does**

This tool predicts RNA secondary structures. RNAshape abstraction maps structures to a tree-like domain of shapes, retaining adjacency and nesting of structural features, but disregarding helix lengths. Shape abstraction integrates well with dynamic programming algorithms, and hence it can be applied during structure prediction rather than afterwards. This avoids exponential explosion and can still give us a non-heuristic and complete account of properties of the molecule's folding space.


**Input**

RNA sequence(s): A (multiple) FASTA file, containing RNA primary sequences.
RNA secondary structure: A Vienna dot-bracket formatted string, representing a seconday RNA structure.
RNA sequence: Exactly one RNA primary sequence.
RNA family: A family of at least two potentially related RNA sequences. This is not an alignment, since sequences can have different lengths.


**Parameters**  

**Calculation Modes**

- **shapes**: Output of "subopt" mode is crowded by many very similar answers, which make it hard to focus to the "important" changes. The abstract shape concept groups similar answers together and reports only the best answer within such a group. Due to abstraction, suboptimal analyses can be done more thorough, by ignoring boring differences. (see parameter --shapeLevel)

- **mfe**: Computes the single energetically most stable secondary structure for the given RNA sequence. Co-optimal results will be suppressed, i.e. should different prediction have the same best energy value, just an arbitrary one out of them will be reported. This resembles the function of the program "RNAfold" of the Vienna group. If you only use "mfe" mode, consider switching to RNAfold, because their implementation is much faster, due to sophisticated low level C optimisations.

- **subopt**: Often, the biological relevant structure is hidden among suboptimal predictions. In "subopt" mode, you can also inspect all suboptimal solutions up to a given threshold (see parameters --absoluteDeviation and --relativeDeviation). Duplicates might appear when using grammar "microstate", due to its semantic ambiguity according Vienna-Dot-Bracket strings. 

- **probs**: Structure probabilities are strictly correlated to their energy values. Grouped together into shape classes, their probabilities add up. Often a shape class with many members of worse energy becomes more probable than the shape containing the mfe structure but not much more members.

- **sample**: Probabilistic sampling based on partition function. This mode combines stochastic sampling with a-posteriori shape abstraction. A sample from the structure space holds M structures together with their shapes, on which classification is performed. The probability of a shape can then be approximated by its frequency in the sample.

- **cast**: This mode is the RNAcast approach. For a family of RNA sequences, this method independently enumerates the near-optimal abstract shape space, and predicts as the consensus an abstract shape common to all sequences. For each sequence, it delivers the thermodynamically best structure which has this common shape. Input is a multiple fasta file, which should contain at least two sequences. Output is sorted by "score" of common shapes, i.e. summed free energy of all sequences. R is the rank (= list position) of the shape in individual sequence analysis.

- **eval**: Evaluates the free energy of an RNA molecule in fixed secondary structure, similar to RNAeval from the Vienna group. Multiple answers stem from semantic ambiguity of the underlying grammar. It might happen, that your given structure is not a structure for the sequence. Maybe your settings are too restrictive, e.g. not allowing lonely base-pairs (--allowLP). If you input a (multiple) FASTA file, RNAshapes assumes that exactly first half of the contents of each entry is RNA sequence, second half is the according structure. Whitespaces are ignored.

- **abstract**: Converts a Vienna-Dot-Bracket representation of a secondary structure into a shape string.

- **outside**: Applies the "outside"-algorithm to compute probabilities for all base pairs (i,j), based on the partition function. Output is a PostScript file, visualizing these probabilities as a "dot plot". The "dot plot" shows a matrix of squares with area proportional to the base pair probabilities in the upper right half. For each pair (i,j) with probability above --bppmThreshold there is a line of the form i j sqrt(p) ubox in the PostScript file, so that they can be easily extracted.

- **mea**: Finds the secondary structure with the maximal sum of base-pair probabilities (MEA=maximal expected accuracy). The equivalent Vienna Package name is the 'centroid secondary structure', defined as 'The centroid structure is the structure with the minimum total base-pair distance to all structures in the thermodynamic ensemble.'.


**Window Size**

Activates window mode and computes substrings of size i for the input. After computation for the first i bases is done, the window is pushed j bases to the right and the next computation is startet. j is set by --windowIncrement. i must be a non-zero positive integer, smaller than the input length.


**windowIncrement**

If --windowSize is given, this parameter sets the offset for the next window to j bases. j must be a non-zero positive integer, smaller than --windowSize.


**Dangling End Energies**

How to treat "dangling end" energies for bases adjacent to helices in free ends and multi-loops. 

- **nodangle**: (-d 0 in Vienna package) ignores dangling energies altogether.
- **overdangle**: (-d 2 in Vienna package) always dangles bases onto helices, even if they are part of neighboring helices themselves. Seems to be wrong, but could perform surprisingly well.
- **microstate**: (-d 1 in Vienna package) correct optimisation of all dangling possibilities, unfortunately this results in an semantically ambiguous search space regarding Vienna-Dot-Bracket notations.
- **macrostate**: (no correspondens in Vienna package) same as microstate, while staying unambiguous. Unfortunately, mfe computation violates Bellman's principle of optimality. Default is "macrostate".


For more information, visit http://bibiserv2.cebitec.uni-bielefeld.de/rnashapes?id=rnashapes_rnashapes_manual_manual

  </help>
  
  <citations>
    <citation type="doi">doi:10.1093/bioinformatics/btu649</citation>
  </citations>
</tool>