Mercurial > repos > earlhaminst > miranda
changeset 0:05bc31ccc323 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/miranda commit e24b91d3051c422ee2a20caf8bb12f0896e2c84a-dirty
author | earlhaminst |
---|---|
date | Fri, 11 Nov 2016 07:03:25 -0500 |
parents | |
children | 89d595ffa9db |
files | miranda.xml miranda_parser.py test-data/microrna.fasta test-data/query.fasta test-data/raw_output.txt test-data/tabular_output.txt tool_dependencies.xml |
diffstat | 7 files changed, 236 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/miranda.xml Fri Nov 11 07:03:25 2016 -0500 @@ -0,0 +1,56 @@ +<tool id="miranda" name="miRanda" version="3.3a"> + <description>finds potential target sites for miRNAs in genomic sequences</description> + <requirements> + <requirement type="package" version="3.3a">miranda</requirement> + </requirements> + <version_command>miranda --version|grep 'miranda v'</version_command> + <command> +<![CDATA[ +miranda '$microrna' '$query' +#if str($sc) + -sc $sc +#end if +#if str($en) + -en $en +#end if +-out '$raw_output' +&& +python '$__tool_directory__/miranda_parser.py' '$raw_output' '$tabular_output' +]]> + </command> + <inputs> + <param name="microrna" type="data" format="fasta" label="MicroRNA sequences" help="FASTA format" /> + <param name="query" type="data" format="fasta" label="Query sequences" help="FASTA format" /> + <param name="sc" type="float" min="0.0" value="140.0" optional="true" label="Alignment score threshold" help="Only alignments with score greater or equal to this value will be used for further analysis (-sc)" /> + <param name="en" type="float" value="1.0" optional="true" label="Set energy threshold to -E kcal/mol" help="Only alignments with energy less or equal to this value will be used for further analysis. A negative value is required for filtering to occur (-en)" /> + </inputs> + <outputs> + <data name="raw_output" format="txt" label="${tool.name} on ${on_string}: raw output" /> + <data name="tabular_output" format="tabular" label="${tool.name} on ${on_string}: tabular output" /> + </outputs> + <tests> + <test> + <param name="microrna" ftype="fasta" value="microrna.fasta" /> + <param name="query" ftype="fasta" value="query.fasta" /> + <output name="raw_output" file="raw_output.txt" lines_diff="4" /> + </test> + </tests> + <help> +<![CDATA[ +`miRanda`_ is an algorithm for the detection of potential microRNA target sites in genomic sequences. + +One or more miRNA sequences are scanned against all query sequences and potential target sites are reported. Potential target sites are identified using a two-step strategy. First a dynamic programming local alignment is carried out between the query miRNA sequence and the reference sequence. This alignment procedure scores based on sequence complementarity and not on sequence identity. In other words we look for A:U and G:C matches instead of A:A, G:G, etc. The G:U wobble bair is also permitted, but generally scores less than the more optimal matches. + +The second phase of the algorithm takes high-scoring alignments (i.e. those above the alignment score threshold) detected from phase 1 and estimates the thermodynamic stability of RNA duplexes based on these alignments. This second phase of the method utilizes folding routines from the RNAlib library, which is part of the ViennaRNA package written by Ivo Hofacker. At this stage we generate a constrained fictional single-stranded RNA composed of the query sequence, a linker and the reference sequence (reversed). This structure is then folded using RNAlib and the minimum free energy (DG kcal/mol) is calculated for that structure. + +Finally, detected targets with energies less than a chosen energy threshold are selected as potential target sites. Target site alignments passing both thresholds and other information is produced as output. + +View the original miRanda manual: http://cbio.mskcc.org/microrna_data/manual.html + +.. _miRanda: http://www.microrna.org/ +]]> + </help> + <citations> + <citation type="doi">10.1186/gb-2003-5-1-r1</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/miranda_parser.py Fri Nov 11 07:03:25 2016 -0500 @@ -0,0 +1,8 @@ +import sys + +# get hits from miranda scans +with open(sys.argv[1]) as infile1: + with open(sys.argv[2], "w") as outfile: + for line1 in infile1: + if "%" in line1 and "Forward:" not in line1: + outfile.write(line1)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/microrna.fasta Fri Nov 11 07:03:25 2016 -0500 @@ -0,0 +1,2 @@ +>gi|29565487|emb|AJ550546.1| Drosophila melanogaster microRNA miR-bantam +GTGAGATCATTTTGAAAGCTG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/query.fasta Fri Nov 11 07:03:25 2016 -0500 @@ -0,0 +1,57 @@ +>gi|945100|gb|U31226.1|DMU31226 Drosophila melanogaster head involution defective protein (hid) mRNA, complete cds (3'UTR only) +TGACAAAAAATAAAAAACGAAATCCATCGTGAACAGTTTTGTGTTTTTAAATCAGTTCTAAACACGAAAA +GGGTTGATGAAAAACGCAGAAGAATCCGAAAAACTAACTAACCGAGCAAAAACTTGACTTGAGTGTTGTT +TGACAAATCAGGAAAGATAAAAAACAAATCATAAGAAAAAACTGCACGAAAAATGAAAAAGTTTCTAATA +TTCAAAATCTTGCACAAGAAATACAAAATCAATTAAAGTGAACTCTAACCAAAAGTTGTACACAAAATAA +AAAGCAAAACAAAGCAGCGAAGAACAATCACAAGAAGAGCAAAGTGCCAACAAAGTGCAGGAAGGAAGGA +AGCGGATAAGGACAAAAAGGAAGCCAGCACACACACACACACCCACACAATGGCCGTGCCCTTTTATTTG +CCCGAGGGCGGCGCCGATGACGTAGCGTCGAGTTCATCGGGAGCCTCGGGCAACTCCTCCCCCCACAACC +ACCCACTTCCCTCGAGCGCATCCTCGTCCGTCTCCTCCTCGGGCGTGTCCTCGGCCTCCGCCTCCTCGGC +CTCATCTTCGTCATCCGCATCGTCGGACGGCGCCAGCAGCGCCGCCTCGCAATCGCCGAACACCACCACC +TCGTCGGCCACGCAGACGCCGATGCAGTCTCCACTGCCCACCGACCAAGTGCTATACGCCCTCTACGAGT +GGGTCAGGATGTACCAGAGCCAGCAGAGTGCCCCGCAAATCTTCCAGTATCCGCCGCCAAGCCCCTCTTG +CAATTTCACTGGCGGCGATGTGTTCTTTCCGCACGGCCATCCGAATCCGAACTCGAATCCCCATCCGCGC +ACCCCCCGAACCAGCGTGAGCTTCTCCTCCGGCGAGGAGTACAACTTCTTCCGGCAGCAGCAGCCGCAAC +CACATCCGTCATATCCGGCGCCATCAACACCGCAGCCAATGCCACCGCAGTCAGCGCCGCCGATGCACTG +CAGCCACAGCTACCCGCAGCAGTCGGCGCACATGATGCCACACCATTCCGCTCCCTTCGGAATGGGCGGT +ACCTACTACGCCGGCTACACGCCACCACCCACTCCGAACACGGCCAGTGCGGGCACCTCCAGCTCATCGG +CGGCCTTCGGCTGGCACGGCCACCCCCACAGCCCCTTCACGTCGACCTCCACGCCGTTATCGGCGCCAGT +GGCGCCCAAGATGCGCCTGCAGCGCAGCCAGTCGGATGCGGCCAGACGCAAGCGATTGACCTCGACGGGC +GAGGATGAGCGCGAGTACCAGAGCGATCATGAGGCCACTTGGGACGAGTTTGGCGATCGCTACGACAACT +TTACGGCCGGCCGGGAGCGTCTGCAGGAGTTCAATGGACGCATCCCGCCCCGGAAGAAGAAGAGCTCCAA +TAGCCACTCGAGCAGCAGCAATAATCCAGTCTGCCATACCGACAGCCAGTCCGGTGGTACATCCCAAGCG +GAGAGCGGTGCCATCCATGGCCACATCAGTCAGCAGCGACAGGTGGAGCGAGAACGACAAAAGGCGAAGG +CCGAGAAGAAGAAACCACAGAGCTTCACTTGGCCAACTGTTGTGACCGTTTTCGTTTTGGCCATGGGCTG +TGGCTTCTTTGCGGCGCGATGAAAGCGCAGGAGACGTGTAATCGAATGATCTATAGTGAAATCAGCTAGC +CCTTAAGATATATGCCGATCTAAACATAGTTGTAGTTAAACCGTACATAAGTGCAACGAATTTATTGAAC +TGCAGGAGCGAAAGCAGAAAGTCATTAATTCGTAAACGGATTGTTAGATACACAAACAGCCAACATACAC +GAAGAGTGTGCCTAAGATTAAGAAGGTTGACGGGACACAAGAACAATATATTCTATCTGTCTATGGTAAC +TGCATTTGTATTTCTAAAACGAAACGAAAGATAACAATCTTAACTGCTCAAAGTAATGAAAACTCTTAGA +CTGGCAAGAGACTCAAATCACACTTATTTTTTTGCTGATCCATATTTTTGTACAACCTTTTGAGCGATAT +TTACAAATTATACTAGTACAAAAAAAAGAGAGAGAGAGATAAGCAAAAGAAAACTGCCACTTTTGAGATA +CTTTTGATAATCTTTGATTTGCATTTAATCATTTCCACACTTGCATTTTTTATAAACAACAAACAAAATT +ACTTCCATTGTAGAACAAAGTAAACTGCAATTTCAATGTCTTCGCATTTGTAATTCCGAATTGCAAGAAA +AACAAAAATATTTTAAATATGTTTAACTAGTAGAATTTTTTAAACGTAAGTCCACAAAAACAAGCACATC +TAGCTTTAATTGTTGAAACAAAAGCAGAAAAAACGCAACAAAAAAATGAATGAAAATCATTAAATTAATT +TTGTATATAGTTTTTATGCCATTTTTGTGATGTTTTGTGTCTACGGTTTATGTCATGTTATTTTAGTTAA +ATTTCTTATGATTTATGTTTATTTGTAATATTTTTTGTCATTGTTTGTTCATCATCATATTCAAATTGGT +CTCACAATATAATAGTTTTAAGCTCCACGCCCGGGAGATTGATGGCAAAACGATTGAAATTTGGCCAGAA +GAGAGATAGTTTTCCCCATTCGTACACAGTCTTTTTTGGAATGCACATTAATGATCTCTCACAATGGAAA +TTAATGAAAATTGATCTCCGCAGCTAGCCAAAGTTAAAAAAGAAATGAAGAGGAAAACATATTCTATAGG +CAATTTTCACTATATGCTAGAATTTCCCGGGCGTTTCAATGCTAATCGAATACAGTGACATGAAAGCAAA +CATAGCGAAAATATTAAGAAAATCAATCAAAAAGAAAGAAAAACCAATTCCCAAAAATCGCATTGATCTC +ATGGATTTATACAATACAATTACATCAACCGTTTTTTTACAATGAGAAATGTTATAAAAAGCAGAAAGTG +AAACACAGAAACATAAACAAAAATTAACGAAAAGCTTAGATATAAGTTCGCCAAGCGTTTTAGTTCTATT +TTCTAGAATGTCTAAGTCGGTTTAGTGAGTTTATTAAGCTGTCTTCGGACACAAGTTTATTTGTATATAA +GCAATATTATTTGTGTAGCCTAAGTGACAGTCCCAATCAAATCCAATCCAATATCACCCAGTCCCGGACA +TTTCCCAGCAAAACAATAGACTATTCTCGCGTTCACATGTATCAATCTTAATTTGAATTACCACAAAATG +AAATGAAATACTAAAACCATACACAAATGAAAAATTATTTTTGTAAATTGTTTGCATCAAGTGAGCAAGG +GGATTAGATTAAGGAATCATCCTTGCTTTATCCCCTGCTTATTGCTAATTAGTTTTCACAATGATCTCGG +TAAAGTTTTGTGGCCTTGCGCCCAAAAGTCGTACAGATTTTTGGTTTGCCATAAATACTCGAACAAAAAG +TTAATGAAAAACGAAGCAAATGGAAAAAAAATCAGAATGAAACACAAGAAATTTATATTTTTGACCCAAT +GCTACTTAATCCGTTTTTGTAATTTAAGTATCTTTACTCGACCTTGTATATAGCGCAGTTCGAATCACAG +AATCAAATGCCATTTTTGTATAGAATTTTATTTGGTGCCAAAACAGTGACAGATAATTAAATGTCTATGA +ACCCGTGTATTTCGCATATTATACATTTATACATATATCGTAACTTCAATGATAAGTTTGATTCTGAAAT +TTTGTCAACTCAATTTAAGAAACATTTCTGTTGTAGTTTAGTGATTGCTAGCAGAAAGCACTTTGTTTAA +TTGTACATTTTATATTATGCTGTAATATTTTAATATACATAAATATCATTATTGATCTCATGAATATGTT +CATAAGACAACAAAAATTATATATATGAATACATCTATGTGTATGTGTAAAG
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/raw_output.txt Fri Nov 11 07:03:25 2016 -0500 @@ -0,0 +1,92 @@ + + +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +miranda v3.3a microRNA Target Scanning Algorithm +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +(c) 2003 Memorial Sloan-Kettering Cancer Center, New York + +Authors: Anton Enright, Bino John, Chris Sander and Debora Marks +(mirnatargets (at) cbio.mskcc.org - reaches all authors) + +Software written by: Anton Enright +Distributed for anyone to use under the GNU Public License (GPL), +See the files 'COPYING' and 'LICENSE' for details + +If you use this software please cite: +Enright AJ, John B, Gaul U, Tuschl T, Sander C and Marks DS; +(2003) Genome Biology; 5(1):R1. + + miranda comes with ABSOLUTELY NO WARRANTY; + This is free software, and you are welcome to redistribute it + under certain conditions; type `miranda --license' for details. + +Current Settings: +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +Query Filename: /tmp/tmpCIUFbf/files/000/dataset_109.dat +Reference Filename: /tmp/tmpCIUFbf/files/000/dataset_110.dat +Gap Open Penalty: -9.000000 +Gap Extend Penalty: -4.000000 +Score Threshold: 140.000000 +Energy Threshold: 1.000000 kcal/mol +Scaling Parameter: 4.000000 +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +Read Sequence:gi|29565487|emb|AJ550546.1| Drosophila melanogaster microRNA miR-bantam(21 nt) +Read Sequence:gi|945100|gb|U31226.1|DMU31226 Drosophila melanogaster head involution defective protein (hid) mRNA, complete cds (3'UTR only)(3902 nt) +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= +Performing Scan: gi|29565487|emb|AJ550546.1| vs gi|945100|gb|U31226.1|DMU31226 +=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= + + Forward: Score: 167.000000 Q:2 to 20 R:3340 to 3360 Align Len (18) (83.33%) (94.44%) + + Query: 3' gtCGAAAGTTTTACTAGAGTg 5' + |:||||| |||||||||: + Ref: 5' taGTTTTCACAATGATCTCGg 3' + + Energy: -24.540001 kCal/Mol + +Scores for this hit: +>gi|29565487|emb|AJ550546.1| gi|945100|gb|U31226.1|DMU31226 167.00 -24.54 2 20 3340 3360 18 83.33% 94.44% + + + Forward: Score: 156.000000 Q:2 to 17 R:2505 to 2525 Align Len (15) (86.67%) (93.33%) + + Query: 3' gtcgaAAGTTTTACTAGAGTg 5' + |||||| ||:||||| + Ref: 5' tcataTTCAAATTGGTCTCAc 3' + + Energy: -20.030001 kCal/Mol + +Scores for this hit: +>gi|29565487|emb|AJ550546.1| gi|945100|gb|U31226.1|DMU31226 156.00 -20.03 2 17 2505 2525 15 86.67% 93.33% + + + Forward: Score: 155.000000 Q:2 to 16 R:2852 to 2872 Align Len (14) (78.57%) (85.71%) + + Query: 3' gtcgaaAGTTTTACTAGAGTg 5' + ||: | |||||||| + Ref: 5' caaaaaTCGCATTGATCTCAt 3' + + Energy: -14.570000 kCal/Mol + +Scores for this hit: +>gi|29565487|emb|AJ550546.1| gi|945100|gb|U31226.1|DMU31226 155.00 -14.57 2 16 2852 2872 14 78.57% 85.71% + + + Forward: Score: 152.000000 Q:2 to 18 R:3820 to 3841 Align Len (17) (76.47%) (76.47%) + + Query: 3' gtcgAAAGT-TTTACTAGAGTg 5' + | ||| | |||||||| + Ref: 5' taaaTATCATTATTGATCTCAt 3' + + Energy: -14.180000 kCal/Mol + +Scores for this hit: +>gi|29565487|emb|AJ550546.1| gi|945100|gb|U31226.1|DMU31226 152.00 -14.18 2 18 3820 3841 17 76.47% 76.47% + +Score for this Scan: +Seq1,Seq2,Tot Score,Tot Energy,Max Score,Max Energy,Strand,Len1,Len2,Positions +>>gi|29565487|emb|AJ550546.1| gi|945100|gb|U31226.1|DMU31226 630.00 -73.32 167.00 -24.54 1 21 3902 3340 2505 2852 3820 +Complete + +Scan Complete +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/tabular_output.txt Fri Nov 11 07:03:25 2016 -0500 @@ -0,0 +1,4 @@ +>gi|29565487|emb|AJ550546.1| gi|945100|gb|U31226.1|DMU31226 167.00 -24.54 2 20 3340 3360 18 83.33% 94.44% +>gi|29565487|emb|AJ550546.1| gi|945100|gb|U31226.1|DMU31226 156.00 -20.03 2 17 2505 2525 15 86.67% 93.33% +>gi|29565487|emb|AJ550546.1| gi|945100|gb|U31226.1|DMU31226 155.00 -14.57 2 16 2852 2872 14 78.57% 85.71% +>gi|29565487|emb|AJ550546.1| gi|945100|gb|U31226.1|DMU31226 152.00 -14.18 2 18 3820 3841 17 76.47% 76.47%
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Fri Nov 11 07:03:25 2016 -0500 @@ -0,0 +1,17 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="miranda" version="3.3a"> + <install version="1.0"> + <actions> + <action type="download_by_url" target_filename="miRanda-3.3a.tar.gz" sha256sum="a671da562cf4636ef5085b27349df2df2f335774663fd423deb08f31212ec778">http://cbio.mskcc.org/microrna_data/miRanda-aug2010.tar.gz</action> + <action type="autoconf" /> + <action type="set_environment"> + <environment_variable name="PATH" action="prepend_to">$INSTALL_DIR/bin</environment_variable> + </action> + </actions> + </install> + <readme> +miRanda finds potential target sites for miRNAs in genomic sequences. + </readme> + </package> +</tool_dependency>