Mercurial > repos > rnateam > graphclust_fasta_to_gspan
changeset 0:b6e6830ff06b draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/GSPAN commit 21aaee40723b5341b4236edeb0e72995c2054053
author | rnateam |
---|---|
date | Fri, 16 Dec 2016 07:35:04 -0500 |
parents | |
children | 358c52752305 |
files | fasta2shrep_gspan.xml test-data/GSPAN_Outputs/1.group.gspan.bz2 test-data/data.fasta |
diffstat | 3 files changed, 118 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fasta2shrep_gspan.xml Fri Dec 16 07:35:04 2016 -0500 @@ -0,0 +1,98 @@ +<tool id="gspan" name="fasta_to_gspan" version="0.1"> + <requirements> + <requirement type="package" version="0.1">graphclust-wrappers</requirement> + <requirement type="package" version="2.1.6">rnashapes</requirement> + </requirements> + <stdio> + <exit_code range="1:" /> + </stdio> + <command> + <![CDATA[ + + 'fasta2shrep_gspan.pl' --fasta '$dataFasta' $i_crop_unpaired_ends $i_abstr $i_stacks -t "3=0,5=80" -M $M -c $rel_energy_range -win $wins -shift $shift $u $seq_graph_t --group $group + +]]> + </command> + <inputs> + <param type="data" name="dataFasta" format="fasta" /> + <param name="i_stacks" truevalue="-stack" falsevalue="" checked="True" type="boolean" label="Add stacking information to graphs." help="-stack"/> + <param name="i_abstr" truevalue="-abstr" falsevalue="" type="boolean" label="Add abstract structure graphs to the single shrep graph instances." help="-abstr"/> + <param name="i_crop_unpaired_ends" truevalue="-cue" falsevalue="" type="boolean" label="Add abstract structure graphs to the single shrep graph instances." help="-cue"/> + <param name="M" type="integer" value="5" size="7" label="Max number of shreps that should be taken per window." help="-M"/> + <param name="rel_energy_range" type="integer" value="20" size="7" label=" Relative energy range, i.e. percentage (%) of MFE energy (RNAshapes)" help="-c"/> + <param name="wins" type="text" value="40,150" size="20" label=" A list of window sizes to use" help="comma separated integers"/> + <param name="shift" type="integer" value="30" size="7" label="The shift of the window, relative to the window size given inpercent." help="by default 30"/> + <param name="u" truevalue="-u" falsevalue="" checked="True" type="boolean" label="Ignore unstable structures (RNAshapes)" help="-u"/> + <param name="seq_graph_t" truevalue="--seq-graph-t" falsevalue="" checked="True" type="boolean" label="Add for each 't #' a graph which contains no structure" help="--seq-graph-t"/> + <param name="group" type="integer" value="10000" size="7" label="Group size." help="by default 10000"/> + </inputs> + <outputs> + <data name="gspan.zip" format="searchgui_archive" from_work_dir="GSPAN_Outputs/1.group.gspan.bz2" label="gspan zip" /> + </outputs> + <tests> + <test> + <param name="dataFasta" value="data.fasta"/> + <output name="gspan.zip" file="GSPAN_Outputs/1.group.gspan.bz2" /> + </test> + </tests> + <help> + <![CDATA[ + +**What it does** + +For each fragment of input sequence we use RNAshapes to create a set of structures. +The default parameters for example consider for each input fragment again a +window of size 40nt and 150nt with a window shift of 30%. This allows to +consider local structures as well as global structures for a fragment. +From each such RNAshape window we take the top 5 shreps (suboptimal +structures for the top 5 shapes) within 20% of the mfe energy of that window +and convert them into graphs. +As shape level (abstraction level) we use 3 for short sequences and 5 for +sequences >= 80nt. Please see also RNAshapes documentation for all these +terms. + + +**Parameters** + ++ **input** : Sequences in Fasta format. + ++ **stack** : This adds an additional vertex (type P) for each pair of stacked base-pairs and four edges + (type p) from each of the involved bases to the new vertex. + ++ **abstr** : Add abstract structure graphs to the single shrep graph instances. + ++ **cue** : Crop unpaired ends. If you give this flag, then the unpaired ends of each single structure are ignored. E.g. the structure + ...(((...))).. becomes just (((...))) + ++ **M** : Max number of shreps that should be taken per window. + ++ **c** : Relative energy range, i.e. percentage (%) of MFE energy (RNAshapes) Use only one of -e and -c! + ++ **wins** : A list of window sizes to use. If none are given (empty string ''), then the entire sequence is taken with no windows. Each window > 1 required! + ++ **shift** : The shift of the window, relative to the window size given in percent. So you give which percent of the window size shall be + used for the shift. Of course the shift is rounded down to the + nearest whole number. + Example 20 % of a window 150 would result in a step size of 30 nt. + It is a relative parameter, as you can give different window sizes. + If you do not give this parameter there is a default shift of 1 nt. + ++ **u** : Ignore unstable structures (RNAshapes). This option filters out closed structures with positive free energy. + ++ **seq-graph-t** : Add for each 't #' a graph which contains no structure + ++ **group** : Combine/group that number of input seqs into 1 gspan file output name is then '<INT>.group.gspan.bz2' + + ]]> + </help> + <citations> + <citation type="doi">10.1093/bioinformatics/bts224</citation> + <citation type="bibtex">@article{jan:gie:2015, +author={Janssen, Stefan and Giegerich, Robert}, +title={The RNA shapes studio}, +journal={Bioinformatics}, +year={2015}, +doi={10.1093/bioinformatics/btu649}, +url={http://bioinformatics.oxfordjournals.org/content/31/3/423.abstract}}</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/data.fasta Fri Dec 16 07:35:04 2016 -0500 @@ -0,0 +1,20 @@ +>1 SEQ1#1#120#+ ORIGID RF00001_rep.0_AL096764.11/46123-46004_1 ORIGHEAD RF00001_rep.0 +GUCUAUGGCCAUACCACCCUGAAUGUGCUUGAUCUCAUCUGAUCUCGUGAAGCCAAGCAGGGUGGGGCCUAGUUAGUACUUGGAUGGGAGACUUCCUGGGAAUAUAAGCUGCUGUUGGCU +>2 SEQ2#1#118#+ ORIGID RF00001_rep.1_U89919.1/939-1056_2 ORIGHEAD RF00001_rep.1 +CUUUACGGCCACACCACCCUGAACGCACCGGAUCUCGACUGACCUUGAAAGCUAAGCAGGAUCGGGCCUGGUUAGUAUUGGGAUGGCAGACCCCCUGGAAAUACAGGGUGCUGAAGGU +>3 SEQ3#1#104#+ ORIGID RF00001_rep.2_AJ508600.1/161-58_3 ORIGHEAD RF00001_rep.2 +GUCUACAGCCAUACCAUCCUGAACAUGCCAGAUCUUGUCUGACCUCUGAAGCUAAGCAGGGUCAAGCCUGGUUAGUACUUGGGAGAAGCUGGUGUGGCUAGACC +>4 SEQ4#1#73#+ ORIGID RF00005_rep.0_M15347.1/1040-968_4 ORIGHEAD RF00005_rep.0 +GGCUCCAUAGCUCAGGGGUUAGAGCACUGGUCUUGUAAACCAGGGGUCGCGAGUUCAAUUCUCGCUGGGGCUU +>5 SEQ5#1#72#+ ORIGID RF00005_rep.10_X58792.1/174-245_5 ORIGHEAD RF00005_rep.10 +GGUCCCAUGGUGUAAUGGUUAGCACUCUGGACUUUGAAUCCAGCGAUCCGAGUUCAAAUCUCGGUGGGACCU +>6 SEQ6#1#66#+ ORIGID RF00005_rep.11_AF346992.1/15890-15955_6 ORIGHEAD RF00005_rep.11 +GUCCUUGUAGUAUAAACUAAUACACCAGUCUUGUAAACCGGAGAUGAAAACCUUUUUCCAAGGACA +>7 SEQ7#1#83#+ ORIGID RF00005_rep.12_AC108081.2/59868-59786_7 ORIGHEAD RF00005_rep.12 +GUCAGGAUGGCCGAGCGGUCUAAGGCGCUGCGUUCAGGUCGCAGUCUCCCCUGGAGGCGUGGGUUCGAAUCCCACUUCUGACA +>8 SEQ8#1#70#+ ORIGID RF00005_rep.13_AC067849.6/4771-4840_8 ORIGHEAD RF00005_rep.13 +CACUGUAAAGCUAACUUAGCAUUAACCUUUUAAGUUAAAGAUUAAGAGAACCAACACCUCUUUACAGUGA +>9 SEQ9#1#73#+ ORIGID RF00005_rep.14_AL021808.2/65570-65498_9 ORIGHEAD RF00005_rep.14 +GCUUCUGUAGUGUAGUGGUUAUCACGUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAAACCGGGCAGAAGCA +>10 SEQ10#1#73#+ ORIGID RF00005_rep.15_AC008443.10/42590-42518_10 ORIGHEAD RF00005_rep.15 +GCCCGGCUAGCUCAGUCGGUAGAGCAUGAGACUCUUAAUCUCAGGGUCGUGGGUUCGAGCCCCACGUUGGGCG