view rnafold.xml @ 7:86f517dcfdfb draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/vienna_rna commit d9f13fd859165e4280412ec73f8f3f8b258d351d-dirty
author rnateam
date Thu, 28 Sep 2017 16:24:39 -0400
parents b31dcdb31209
children bdb786715d28
line wrap: on
line source

<tool id="viennarna_rnafold" name="@EXECUTABLE@" version="@VERSION@.3">
    <description>Calculate minimum free energy secondary structures and partition function of RNAs</description>
    <macros>
        <token name="@EXECUTABLE@">RNAfold</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements">
	    <requirement type="package" version="1.65">biopython</requirement>
    </expand>
    <expand macro="stdio" />
    <expand macro="version_command" />
    <command>
 <![CDATA[
    #if str($input_source.select_fasta) == "false"
        #if str($input_source.input_sequence).lstrip()[0] == ">"
            echo '${input_source.input_sequence}' > "input.fasta" &&
        #else
            echo ">Sequence"                      >  "input.fasta" &&
            echo '${input_source.input_sequence}' >> "input.fasta" &&
        #end if
    #end if

    #if str($constraints.shapeOption.shapeSelector) == "isUsed"
      python  '$__tool_directory__/rnafold_SHAPE.py'
    #else
      RNAfold
    #end if

    -T $temperature
    --dangles=$dangling
    #if $layout_type ==0
        --layout-type=$general_options.layout_type
    #end if
    #if $mea:
        --MEA=$mea
    #end if
    #if $pf
        $pf
        #if $pfscale:
            --pfScale=$pfscale
        #end if
    #end if
    $advancedOptions.noconversion
    $advancedOptions.gquad
    $advancedOptions.nolp
    $advancedOptions.nops
    $advancedOptions.nogu
    $advancedOptions.noclosinggu
    $advancedOptions.notetra
    $advancedOptions.circular
    #if $advancedOptions.bppmThreshold <> 1e-5
        --bppmThreshold=$advancedOptions.bppmThreshold
    #end if
    #if $advancedOptions.nsp
        --nsp='$advancedOptions.nsp'
    #end if
    #if $advancedOptions.betaScale <> 1.0
        --betaScale=$advancedOptions.betaScale
    #end if
    #if $constraints.maxBPspan <> -1
        --maxBPspan=$constraints.maxBPspan
    #end if
    #if str($constraints.constraintLocation.constraintSelector) == "fromFile"
        --constraint='$constraints.constraintLocation.constraintsFile'
        $constraints.constraintLocation.batch
        $constraints.constraintLocation.canonicalBPonly
        $constraints.constraintLocation.enforceConstraint
    #end if
    #if str($constraints.constraintLocation.constraintSelector) == "inFile"
        -C
    #end if
    #if str($constraints.shapeOption.shapeSelector) == "isUsed"
        --shape='$constraints.shapeOption.shapeFile'
        #if str($constraints.shapeOption.shapeMethod.methodSelector) == "W"
            #set $s="W"
            --shapeMethod=$s
        #else if str($constraints.shapeOption.shapeMethod.methodSelector) == "Z"
            #set $s="Zb"+str($constraints.shapeOption.shapeMethod.b)
            --shapeMethod=$s
            #if str($constraints.shapeOption.shapeMethod.shapeConversion.conversionSelector) == "C"
                #set $c="C"+str($constraints.shapeOption.shapeMethod.shapeConversion.c)
                --shapeConversion=$c
            #else if str($constraints.shapeOption.shapeMethod.shapeConversion.conversionSelector) == "L"
                #set $c="Ls"+str($constraints.shapeOption.shapeMethod.shapeConversion.s)+"i"+ str($constraints.shapeOption.shapeMethod.shapeConversion.i)
                --shapeConversion=$c
            #else if str($constraints.shapeOption.shapeMethod.shapeConversion.conversionSelector) == "O"
                #set $c="Os"+str($constraints.shapeOption.shapeMethod.shapeConversion.s)+"i"+ str($constraints.shapeOption.shapeMethod.shapeConversion.i)
                --shapeConversion=$c
            #else
                #set $c=str($constraints.shapeOption.shapeMethod.shapeConversion.conversionSelector)
                --shapeConversion=$c
            #end if
        #else if str($constraints.shapeOption.shapeMethod.methodSelector) == "D"
            #set $s="Dm"+str($constraints.shapeOption.shapeMethod.m)+"b"+str($constraints.shapeOption.shapeMethod.b)
            --shapeMethod=$s
        #end if
    #end if
    #if $constraints.motif
        --motif='$constraints.motif'
    #end if
    <
    #if str($input_source.select_fasta) == "false"
        "input.fasta"
    #else
        "${input_source.fasta_input}"
    #end if
    > '$tabular_file'
]]>
    </command>

    <inputs>
        <conditional name="input_source">
            <param name="select_fasta" type="select" label="Input from FASTA file">
                <option value="true" selected="true">Use FASTA</option>
                <option value="false">Provide sequence as text-field</option>
            </param>
            <when value="true">
                <param format="fasta" name="fasta_input" type="data" label="Sequence to fold (FASTA file)"/>
            </when>
            <when value="false">
                <param name="input_sequence" type="text" label="Input sequence"/>
            </when>
        </conditional>
        <param name="temperature" type="float" value="37.0" label="Temperature [°C]" argument="-T"/>
        <param name="dangling" type="select" label="How to treat dangling end energies" argument="-d">
            <option value="0">0: ignore dangling ends</option>
            <option value="1">1: unpaired bases participate in one dangling end only</option>
            <option value="2" selected="True">2: unpaired bases participate in all dangling ends</option>
            <option value="3">3: allow coaxial stacking</option>
        </param>
        <param name="layout_type" type="select" label="Layout algorithm" argument="--layout_type" >
            <option value="1" selected="true">Default: Naview layout</option>
            <option value="0">Simple radial layout</option>
        </param>
        <param argument="--MEA" name="mea" type="float" value="1.0" optional="true" label="Gamma Value" help="Used for the calculation of the Maximum Expected accuracy"/>
        <param name="pf" type="boolean" checked="false" truevalue="--partfunc" falsevalue="" label="Calculate Partition Function" help="--partfunc"/>
        <param name="pfscale" type="float" value="1.07" optional="true" label="Scaling factor" help="In the calculation of the pf use scale*mfe as an estimate for the ensemble free energy (used to avoid overflows). The default is 1.07, useful values are 1.0 to 1.2. Occasionally needed for long sequences."/>
        <section name="advancedOptions" title="Advanced options">
            <param name="nops" type="boolean" truevalue="--noPS" falsevalue="" checked="false" label="Do not produce postscript drawing of the mfe structure (reduces run-time)" help="(--noPS)"/>
            <param name="nolp" type="boolean" truevalue="" falsevalue="--noLP" checked="true" label="Allow lonely base-pairs" help="(--noLP)"/>
            <param name="nogu" type="boolean" truevalue="" falsevalue="--noGU" checked="true" label="Allow GU pairing" help="--noGU"/>
            <param name="noclosinggu" type="boolean" truevalue="" falsevalue="--noClosingGU" checked="true" label="Allow GU pairing at the ends" help="Allow pairing of G and U at the ends of helices. --noClosingGU"/>
            <param name="notetra" type="boolean" truevalue="" falsevalue="--noTetra" checked="true" label="Allow stabilization for loops, hairpins etc." help=" Include special tabulated stabilizing energies for tri-, tetra- and hexaloop hairpins. Mostly for testing. (--noTetra)"/>
            <param name="noconversion" type="boolean" truevalue="" falsevalue="--noconv" checked="true" label="Convert Thymine to Uracil (T -> U)" help="Avoids confusion with DNA sequences (--noconv)"/>
            <param name="gquad"
                   type="boolean"
                   truevalue="--gquad"
                   falsevalue=""
                   checked="false"
                   label="Take G Quadruplex formation into account"
                   argument="-g"/>
            <param name="circular"
                   type="boolean"
                   truevalue="--circ"
                   falsevalue=""
                   checked="false"
                   label="Model as circular RNA structure"
                   argument="--circ"/>
             <param name="bppmThreshold"
                     type="float"
                     value="1e-5"
                     label="Threshold for base pair probabilities"
                     help="By setting the threshold the base pair probabilities that are included in the output can be varied. By default only those exceeding 1e−5 in probability will be shown as squares in the dot plot. Changing the threshold to any other value allows for increase or decrease of data."
                     argument="--bppmThreshold"/>
             <param name="nsp"
                   type="text"
                   value=""
                   label="Allow other pairs in addition to the usual AU,GC,and GU pairs."
                   help="Its argument is a comma separated list of additionally allowed pairs. If the first character is '-' then AB will imply that AB and BA are allowed pairs. e.g. RNAfold -nsp -GA  will allow GA and AG pairs. Nonstandard pairs are given 0 stacking energy."
                   argument="--nsp"/>
            <param name="betaScale"
                    type="float"
                    value="1.0"
                    label="Scaling of Boltzman factors"
                    help=" The argument provided with this option enables to scale the thermodynamic temperature used in the Boltzmann factors independently from the temperature used to scale the individual energy contributions of the loop types."
                    argument="--betaScale"/>
        </section>
        <section name="constraints" title="Structure constraints">
                <param name="maxBPspan" type="integer" value="-1" label="Set the maximum base pair span" help="" argument="--maxBPspan"/>
                <conditional name="constraintLocation">
                    <param name="constraintSelector" type="select" label="Constraints">
                        <!-- <option value="fromInput">The constraints are included in the input file</option> -->
                        <option value="fromFile">The constraints are in a seperate file</option>
			            <option value="inFile">The constraints are in the fasta input file</option>
                        <option value="none" selected="true">Don't use constraints</option>
                    </param>
                    <!--	<when value="fromInput"></when> -->
                    <when value="none"></when>
                    <when value="inFile"></when>
                    <when value="fromFile">
                        <param name="constraintsFile" type="data" format="txt" label="Constraints file" argument="--constraint"/>
                        <param name="batch" type="boolean" checked="false" truevalue="--batch" falsevalue=""
                            label="Use constraints for all alignment records"
                            help="Usually, constraints provided from input file are only applied to a single sequence alignment. Therefore, RNAalifold will stop its computation and quit after the first input alignment was processed. Using this switch, RNAfold processes all sequence alignments in the input and applies the same provided constraints to each of them."
                            argument="--batch"/>
                        <param argument="--canonicalBPonly" type="boolean" truevalue="--canonicalBPonly" falsevalue="" checked="false" label="Remove non-canonical base pairs from he structure constraint" />
                        <param argument="--enforceConstraint" type="boolean" truevalue="--enforceConstraint" falsevalue="" checked="false" label="Enforce base pair given by round brackets () in structure constraint" />
                    </when>
                </conditional>
                  <conditional name="shapeOption">
                        <param name="shapeSelector" type="select" label="Shape reactivity data">
                            <option value="isUsed">Use shape reactivity data</option>
                            <option value="notUsed" selected="true">Don't use shape reactivity data</option>
                        </param>
                        <when value ="isUsed">
                            <param type="data" name="shapeFile" format="txt" label="Shape file" argument="--shape"/>
                            <conditional name="shapeMethod">
                                <param name="methodSelector" type="select" label="Shape reactivity data" argument="--shapeMethod">
                                    <option value="D" selected="true">D: Convert by using a linear equation according to Deigan et al 2009</option>
                                    <option value="Z">Z: Convert SHAPE reactivities to pseudo energies according to Zarringhalam et al 2012.</option>
                                    <option value="W">W: Apply a given vector of perturbation energies to unpaired nucleotides according to Washietl et al 2012</option>
                                </param>
                                <when value="D">
                                    <param name="m" type="float" value="1.8" label="Slope m"/>
                                    <param name="b" type="float" value="-0.6" label="Intercept"/>
                                </when>
                                <when value="Z">
                                    <param name="b" type="float" value="-0.6" label="Intercept"/>
                                    <conditional name="shapeConversion">
                                        <param name="conversionSelector" type="select" label="shape reactivity data">
                                            <option value="M">M: Use linear mapping according to Zarringhalam et al</option>
                                            <option value="C">C: Use a cutoff−approach to divide into paired and unpaired nucleotides</option>
                                            <option value="S">S: Skip the normalizing step since the input data already represents probabilities for being unpaired rather than raw reactivity values</option>
                                            <option value="L">L: Use a linear model to convert the reactivity into a probability for being unpaired</option>
                                            <option value="O" selected="true">O: Use a linear model to convert the log of the reactivity into a probability for being unpaired</option>
                                        </param>
                                        <when value="M">
                                        </when>
                                        <when value="C">
                                            <param name="c" type="float" value="0.25" label="Cutoff"/>
                                        </when>
                                        <when value="S">
                                        </when>
                                        <when value="L">
                                            <param name="s" type="float" value="0.68" label="Slope"/>
                                            <param name="i" type="float" value="0.2" label="Intercept"/>
                                        </when>
                                        <when value="O">
                                            <param name="s" type="float" value="1.6" label="Slope s"/>
                                            <param name="i" type="float" value="-2.29" label="Intercept"/>
                                        </when>
                                    </conditional>
                                </when>
                                <when value="W">
                                </when>
                            </conditional>
                        </when>
                        <when value="notUsed">
                        </when>
                </conditional>
                <param argument="--motif" type="text" value="" label="Sequence structure energy" help="Specify stabilizing effect of ligand binding to a particular sequence/structure motif. Some ligands binding to RNAs require and/or induce particular sequence and structure motifs. For instance they bind to an interior loop, or small hairpin loop. If for such cases a binding free energy is known, the binding and therefore stabilizing effect of the ligand can be included in the folding recursions. Interior loop motifs are specified as concatenations of 5’ and 3’ motif, separated by an ’&amp;’ character. Energy contributions must be specified in kcal/mol.">
                    <sanitizer>
                        <valid initial="string.printable">
                        </valid>
                    </sanitizer>
				</param>
        </section>
        <section name="IDs" title="Naming Conventions">
            <param name="auto_id"
                type="boolean" truevalue="--auto-id" falsevalue="" checked="false"
                label="Automatically generate an ID for each alignment."
                help="If this flag is active, RNAfold ignores any IDs retrieved from the input and automatically generates an ID for each sequence."
                argument="--auto-id"/>
            <param name="id_prefix"
                type="text" value="sequence"
                label="Prefix for automatically generated IDs (as used in output file names)"
                help="If this parameter is set, each sequence will be prefixed with the provided string. Hence, the output files will obey the following naming scheme: 'prefix_xxxx_ss.ps' (secondary structure plot), 'prefix_xxxx_dp.ps' (dot−plot), 'prefix_xxxx_aln.ps' (annotated sequence), etc. where xxxx is the sequence number beginning with the second sequence in the input. Use this setting in conjunction with the −−continuous−ids flag to assign IDs beginning with the first input sequence."
                argument="--id-prefix"/>
            <param name="id_digits"
                type="integer" value="4" min="1" max="18"
                label="The number of digits of the counter in automatically generated sequence IDs"
                help="When sequences IDs are automatically generated, they receive an increasing number, starting with 1. This number will always be left−padded by leading zeros, such that the number takes up a certain width. Using this parameter, the width can be specified to the users need. We allow numbers in the range [1:18]."
                argument="--id-digits"/>
            <param name="id_start"
                type="integer" value="1" min="0"
                label="First number in automatically generated sequence IDs"
                help="When sequence IDs are automatically generated, they receive an increasing number, usually starting with 1. Using this parameter, the first number can be specified to the users requirements. Note: negative numbers are not allowed. Note: Setting this parameter implies continuous sequence IDs, i.e. it activates the −−continuous−ids flag.."
                argument="--id-start"/>
        </section>
    </inputs>
    <outputs>
        <data format="dbn" name="tabular_file"/>
        <collection name="sequence_outputs" type="list" label="rna_eps outputs">
            <filter>advancedOptions['nops'] is False</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)_ss\.ps" ext="eps" />
        </collection>
        <collection name="matrix_outputs" type="list" label="rna_eps outputs">
            <filter>measelect['pf'] is True</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.+)_dp\.ps" ext="rna_eps" visible="true"/>
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="select_fasta" value="true" />
            <param name="fasta_input" value="rnafold_input1.fa"/>
            <output name="tabular_file" file="rnafold_result1.txt"/>
        </test>
        <test>
            <param name="select_fasta" value="true" />
            <param name="fasta_input" value="rnafold_input2.fa"/>
            <param name="temperature" value="75"/>
            <output name="tabular_file" file="rnafold_result2.txt"/>
        </test>
        <test>
            <param name="select_fasta" value="false" />
            <param name="input_sequence" value="TGGGAATTAGCTCAAATGGTAGAGCGCTCGCTTAGCATGTGAGAGGTAGTGGGATCGATGCCCACATTCTCCA"/>
            <output name="tabular_file" file="rnafold_result3.txt"/>
        </test>
        <test>
            <param name="select_fasta" value="true" />
            <param name="fasta_input" value="sample_3.fa"/>
            <conditional name="shapeOption">
              <param name="shapeSelector" value="isUsed"/>
            </conditional>
            <param name="shapeFile" value="sample_3.react"/>
            <output name="tabular_file" file="sample_3_result.txt"/>
        </test>
    </tests>
    <help>
<![CDATA[
**RNAfold**

The program reads RNA sequences, calculates their minimum free
energy (mfe) structure and the mfe structure in dot-bracket notation.

If the -p option was given it also computes the
partition function (pf) and base pairing probability matrix.

The dot plot of the base pairing probability matrix shows a matrix of squares with area proportional to the pairing
probability in the upper right half, and one square for each pair in the
minimum free energy structure in the lower left half. For each pair i-j with
probability p>10E-6 there is a line of the form

i  j  sqrt(p)  ubox

in the PostScript file, so that the pair probabilities can be easily extracted.

The sequences have to be provided in FASTA format. The first word (max. 42 char) of the FASTA header will be used for output file names. PostScript files "name_ss.ps" and "name_dp.ps" are produced for the structure and dot plot, respectively.
The program will read the whole FASTA input file and provide output for each found sequence.


-----

**Input format**

RNAfold requires one input file or a plain RNA sequence

------

**Outputs**

- Secondary structures in dot-bracket notation

- several possible postscript images bundled together in a tar file
    - secondary structure for each sequence in the input file
    - if partition function is calculated (--MEA or --partfunc is set) then also the pairing probabilty matrix is generated for each sequence

]]>
    </help>
    <expand macro="citations" />
</tool>