changeset 0:78772eedd780 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CMFinder commit 21aaee40723b5341b4236edeb0e72995c2054053
author rnateam
date Fri, 16 Dec 2016 07:34:34 -0500
parents
children a4b009876214
files cmFinder.py cmFinder.xml test-data/cmfinder.fa test-data/in.model.tree.stk test-data/model.cmfinder.stk
diffstat 5 files changed, 267 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cmFinder.py	Fri Dec 16 07:34:34 2016 -0500
@@ -0,0 +1,43 @@
+import os
+import sys
+from shutil import copyfile
+from os import system
+
+
+def sh(script):
+    system("bash -c '%s'" % script)
+
+
+model_tree_stk = sys.argv[1]
+cmfinder_fa = sys.argv[2]
+path = sys.argv[3]
+
+gapCmd = ""
+gapVal = ""
+if len(sys.argv) > 4:
+    gapCmd = sys.argv[4]
+    gapVal = sys.argv[5]
+
+
+cmd = " cp -f %s model.cmfinder.stk" % (model_tree_stk)
+sh(cmd)
+
+alifoldCmd = "%salifold.pl -file  %s" % (path, model_tree_stk)
+# alifoldCmd = "perl " + path + "/alifold.pl -file " + model_tree_stk
+sh(alifoldCmd)
+
+cmd_stk = "%smloc2stockholm.pl -file model.cmfinder.stk  -split_input yes --con_struct %s.alifold" % (path, model_tree_stk)
+# cmd_stk = "perl " + path + "/mloc2stockholm.pl -file model.cmfinder.stk  -split_input yes --con_struct " + model_tree_stk + ".alifold"
+sh(cmd_stk)
+
+model_tree_stk_sth = "model.cmfinder.stk.sth"
+x = "cat " + model_tree_stk_sth
+sh("mv model.cmfinder.stk.sth model.tree.stk")
+
+sh("cmfinder %s %s -a model.tree.stk %s output > model.cmfinder.stk" % (gapCmd, gapVal, cmfinder_fa))
+# sh("cmfinder " + gapCmd + " " + gapVal + " -a model.tree.stk" + " " + cmfinder_fa + " " + " output > model.cmfinder.stk")
+
+if os.path.isfile('output'):
+    sh("rm output")
+else:
+    copyfile("model.tree.stk", "model.cmfinder.stk")
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cmFinder.xml	Fri Dec 16 07:34:34 2016 -0500
@@ -0,0 +1,69 @@
+<tool id="cmFinder" name="CMFinder_v0" version="0.1.0" >
+	<requirements>
+		<requirement type="package" version="0.1">graphclust-wrappers</requirement>
+		<requirement type="package" version='0.2'>cmfinder</requirement>
+		<requirement type="package" version='2.2.10'>viennarna</requirement>
+	</requirements>
+	<stdio>
+		<exit_code range="1:" />
+	</stdio>
+	<command>
+		<![CDATA[
+
+        python '$__tool_directory__/cmFinder.py' '$model_tree_stk' '$cmfinder_fa' ''
+        $gap_threshold_opts.gap_threshold_opts_selector
+        #if str($gap_threshold_opts.gap_threshold_opts_selector) == '--g':
+          $gap_threshold_opts.gap
+        #end if
+
+]]>
+	</command>
+	<inputs>
+		<param name="model_tree_stk" type="data"  format="stockholm" label="model_tree_stk" help="" />
+		<param name="cmfinder_fa" type="data"  format="text" label="cmfinder_fa" help="" />
+		<conditional name="gap_threshold_opts">
+			<param name="gap_threshold_opts_selector" type="select" label="Use gap threshold" help="">
+				<option value="--g" selected="true">Yes (--g)</option>
+				<option value="">No</option>
+			</param>
+			<when value="--g">
+				<param name="gap" type="float" value="1.0" size="5"
+                        label="Define the gap threshold to determine the conserved column (--g)" help=""/>
+			</when>
+			<when value=""/>
+		</conditional>
+	</inputs>
+	<outputs>
+		<data name="model_cmfinder_stk" format="stockholm" label="model_cmfinder_stk" from_work_dir="model.cmfinder.stk"/>
+	</outputs>
+	<tests>
+		<test>
+			<param name="model_tree_stk" value="in.model.tree.stk"/>
+			<param name="cmfinder_fa" value="cmfinder.fa"/>
+			<param name="gap_threshold_opts.gap_threshold_opts_selector" value="--g"/>
+			<param name="gap_threshold_opts.gap" value="1.0"/>
+			<output name="model_cmfinder_stk" file="model.cmfinder.stk"/>
+		</test>
+	</tests>
+	<help>
+		<![CDATA[
+**What it does**
+
+At first it converts CLUSTAL format files to STOCKHOLM format. Then using *CMFinder* determines consensus
+motives for sequences.
+]]>
+	</help>
+	<citations>
+    <citation type="bibtex">@inproceedings{costa2010fast,
+        title={Fast neighborhood subgraph pairwise distance kernel},
+        author={Costa, Fabrizio and De Grave, Kurt},
+        booktitle={Proceedings of the 26th International Conference on Machine Learning},
+        pages={255--262},
+        year={2010},
+        organization={Omnipress}
+      }
+      </citation>
+      <citation type="doi">10.1093/bioinformatics/btk008</citation>
+      <citation type="doi">10.1186/1748-7188-6-26</citation>
+  </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cmfinder.fa	Fri Dec 16 07:34:34 2016 -0500
@@ -0,0 +1,40 @@
+>7 SEQ7#1#83#+ ORIGID RF00005_rep.12_AC108081.2/59868-59786_7 ORIGHEAD RF00005_rep.12
+GUCAGGAUGGCCGAGCGGUCUAAGGCGCUGCGUUCAGGUCGCAGUCUCCCCUGGAGGCGUGGGUUCGAAUCCCACUUCUGACA
+>9 SEQ9#1#73#+ ORIGID RF00005_rep.14_AL021808.2/65570-65498_9 ORIGHEAD RF00005_rep.14
+GCUUCUGUAGUGUAGUGGUUAUCACGUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAAACCGGGCAGAAGCA
+>10 SEQ10#1#73#+ ORIGID RF00005_rep.15_AC008443.10/42590-42518_10 ORIGHEAD RF00005_rep.15
+GCCCGGCUAGCUCAGUCGGUAGAGCAUGAGACUCUUAAUCUCAGGGUCGUGGGUUCGAGCCCCACGUUGGGCG
+>15 SEQ15#1#73#+ ORIGID RF00005_rep.1_AC005329.1/7043-6971_15 ORIGHEAD RF00005_rep.1
+GCCGAAAUAGCUCAGUUGGGAGAGCGUUAGACUGAAGAUCUAAAGGUCCCUGGUUCGAUCCCGGGUUUCGGCA
+>16 SEQ16#1#72#+ ORIGID RF00005_rep.20_AL671879.2/100356-100285_16 ORIGHEAD RF00005_rep.20
+GGGGAUGUAGCUCAGUGGUAGAGCGCAUGCUUCGCAUGUAUGAGGCCCCGGGUUCGAUCCCCGGCAUCUCCA
+>17 SEQ17#1#71#+ ORIGID RF00005_rep.21_AL355149.13/15278-15208_17 ORIGHEAD RF00005_rep.21
+GCAUUGGUGGUUCAGUGGUAGAAUUCUCGCCUCCCACGCGGGAGACCCGGGUUCAAUUCCCGGCCAAUGCA
+>18 SEQ18#1#72#+ ORIGID RF00005_rep.22_AL590385.23/26487-26416_18 ORIGHEAD RF00005_rep.22
+GCGUUGGUGGUAUAGUGGUGAGCAUAGCUGCCUUCCAAGCAGUUGACCCGGGUUCGAUUCCCGGCCAACGCA
+>23 SEQ23#1#74#+ ORIGID RF00005_rep.27_AL352978.6/119697-119770_23 ORIGHEAD RF00005_rep.27
+GGCCGGUUAGCUCAGUUGGUUAGAGCGUGGUGCUAAUAACGCCAAGGUCGCGGGUUCGAUCCCCGUACGGGCCA
+>28 SEQ28#1#71#+ ORIGID RF00005_rep.31_AC092686.3/29631-29561_28 ORIGHEAD RF00005_rep.31
+GCAUUGGUGGUUCAGUGGUAGAAUUCUCGCCUGCCACGCGGGAGGCCCGGGUUCGAUUCCCGGCCAAUGCA
+>30 SEQ30#1#72#+ ORIGID RF00005_rep.33_AC018638.5/4694-4623_30 ORIGHEAD RF00005_rep.33
+GGCUCGUUGGUCUAGGGGUAUGAUUCUCGCUUAGGGUGCGAGAGGUCCCGGGUUCAAAUCCCGGACGAGCCC
+>31 SEQ31#1#73#+ ORIGID RF00005_rep.34_AC008443.10/43006-42934_31 ORIGHEAD RF00005_rep.34
+GUUUCCGUAGUGUAGUGGUUAUCACGUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAAACCGGGCGGAAACA
+>32 SEQ32#1#73#+ ORIGID RF00005_rep.35_AC005783.1/27398-27326_32 ORIGHEAD RF00005_rep.35
+GUUUCCGUAGUGUAGCGGUUAUCACAUUCGCCUCACACGCGAAAGGUCCCCGGUUCGAUCCCGGGCGGAAACA
+>33 SEQ33#1#72#+ ORIGID RF00005_rep.36_AC007298.17/145366-145295_33 ORIGHEAD RF00005_rep.36
+UCCUCGUUAGUAUAGUGGUGAGUAUCCCCGCCUGUCACGCGGGAGACCGGGGUUCGAUUCCCCGACGGGGAG
+>35 SEQ35#1#72#+ ORIGID RF00005_rep.38_J00309.1/356-427_35 ORIGHEAD RF00005_rep.38
+UCCCUGGUGGUCUAGUGGCUAGGAUUCGGCGCUUUCACCGCCGCGCCCCGGGUUCGAUUCCCGGCCAGGAAU
+>37 SEQ37#1#82#+ ORIGID RF00005_rep.3_Z54587.1/126-45_37 ORIGHEAD RF00005_rep.3
+GGUAGCGUGGCCGAGCGGUCUAAGGCGCUGGAUUUAGGCUCCAGUCUCUUCGGAGGCGUGGGUUCGAAUCCCACCGCUGCCA
+>46 SEQ46#1#72#+ ORIGID RF00005_rep.5_AL590385.23/26129-26058_46 ORIGHEAD RF00005_rep.5
+UCCCUGGUGGUCUAGUGGUUAGGAUUCGGCGCUCUCACCGCCGCGGCCCGGGUUCGAUUCCCGGUCAGGGAA
+>51 SEQ51#1#88#+ ORIGID RF00006_rep.0_AF045145.1/1-88_51 ORIGHEAD RF00006_rep.0
+GGCUGGCUUUAGCUCAGCGGUUACUUCGCGUGUCAUCAAACCACCUCUCUGGGUUGUUCGAGACCCGCGGGCGCUCUCCAGCCCUCUU
+>52 SEQ52#1#101#+ ORIGID RF00006_rep.1_AC005219.1/49914-50014_52 ORIGHEAD RF00006_rep.1
+GGGUCGGAGUUAGCUCAAGCGGUUACCUCCUCAUGCCGGACUUUCUAUCUGUCCAUCUCUGUGCUGGGGUUCGAGACCCGCGGGUGCUUACUGACCCUUUU
+>53 SEQ53#1#98#+ ORIGID RF00006_rep.2_AF045143.1/1-98_53 ORIGHEAD RF00006_rep.2
+GGCUGGCUUUAGCUCAGCGGUUACUUCGACAGUUCUUUAAUUGAAACAAGCAACCUGUCUGGGUUGUUCGAGACCCGCGGGCGCUCUCCAGUCCUUUU
+>54 SEQ54#1#88#+ ORIGID RF00006_rep.3_AF045144.1/1-88_54 ORIGHEAD RF00006_rep.3
+GGCUGGCUUUAGCUCAGCGGUUACUUCGAGUACAUUGUAACCACCUCUCUGGGUGGUUCGAGACCCGCGGGUGCUUUCCAGCUCUUUU
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/in.model.tree.stk	Fri Dec 16 07:34:34 2016 -0500
@@ -0,0 +1,8 @@
+CLUSTAL W --- LocARNA 1.8.10
+
+18                 GCGUUGGUGGUAUAGUGGUGAGCAUAGCUGCCUUCCAAGCAGUUGA-CCCGGGUUCGAUUCCCGGCCAACGCA
+17                 GCAUUGGUGGUUCAGUGGU-AGAAUUCUCGCCUCCCACGCGGGAGA-CCCGGGUUCAAUUCCCGGCCAAUGCA
+28                 GCAUUGGUGGUUCAGUGGU-AGAAUUCUCGCCUGCCACGCGGGAGG-CCCGGGUUCGAUUCCCGGCCAAUGCA
+46                 UCCCUGGUGGUCUAGUGGUUAGGAUUCGGCGCUCUCACCGCCGCGG-CCCGGGUUCGAUUCCCGGUCAGGGAA
+35                 UCCCUGGUGGUCUAGUGGCUAGGAUUCGGCGCUUUCACCGCCGCGC-CCCGGGUUCGAUUCCCGGCCAGGAAU
+30                 GGCUCGUUGGUCUAGGGGU-AUGAUUCUCGCUUAGGGUGCGAGAGGUCCCGGGUUCAAAUCCCGGACGAGCCC
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/model.cmfinder.stk	Fri Dec 16 07:34:34 2016 -0500
@@ -0,0 +1,107 @@
+# STOCKHOLM 1.0
+#=GF AU    CMfinder 0.2
+
+#=GS 7  WT    1.00
+#=GS 9  WT    1.00
+#=GS 10 WT    1.00
+#=GS 15 WT    1.00
+#=GS 16 WT    1.00
+#=GS 17 WT    1.00
+#=GS 18 WT    1.00
+#=GS 23 WT    1.00
+#=GS 28 WT    1.00
+#=GS 30 WT    1.00
+#=GS 31 WT    1.00
+#=GS 32 WT    1.00
+#=GS 33 WT    1.00
+#=GS 35 WT    1.00
+#=GS 37 WT    1.00
+#=GS 46 WT    1.00
+
+#=GS  7 DE      1.. 83	79.124153
+#=GS  9 DE      1.. 73	96.426384
+#=GS 10 DE      1.. 73	66.977600
+#=GS 15 DE      1.. 73	79.247490
+#=GS 16 DE      1.. 72	74.848335
+#=GS 17 DE      1.. 71	90.305908
+#=GS 18 DE      1.. 72	81.752869
+#=GS 23 DE      1.. 74	80.096992
+#=GS 28 DE      1.. 71	92.405518
+#=GS 30 DE      1.. 72	75.032211
+#=GS 31 DE      1.. 73	95.823235
+#=GS 32 DE      1.. 73	94.321754
+#=GS 33 DE      1.. 72	74.710342
+#=GS 35 DE      1.. 72	71.951927
+#=GS 37 DE      1.. 82	77.369476
+#=GS 46 DE      1.. 72	83.877258
+
+7              GUCAGGAUGGCCGAG-CGGUCUAAGGCGCUGCGUUCAGGUCGCAGUCUCC
+#=GR 7  SS     <<<<<<<.....................<<<<<.......>>>>>.....
+9              GCUUCUGUAGUGUAG-UGGU-UAUCACGUUCGCCUCACACGCGAA-----
+#=GR 9  SS     <<<<<<<.....................<<<<<.......>>>>>.....
+10             GCCCGGCUAGCUCAGUCGGU--AGAGCAUGAGACUCUUAAUCUCA-----
+#=GR 10 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+15             GCCGAAAUAGCUCAGUUGGG--AGAGCGUUAGACUGAAGAUCUAA-----
+#=GR 15 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+16             GGGGAUGUAGCUCAG-UGGU--AGAGCGCAUGCUUCGCAUGUAUG-----
+#=GR 16 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+17             GCAUUGGUGGUUCAG-UGGU--AGAAUUCUCGCCUCCCACGCGGG-----
+#=GR 17 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+18             GCGUUGGUGGUAUAG-UGGU-GAGCAUAGCUGCCUUCCAAGCAGU-----
+#=GR 18 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+23             GGCCGGUUAGCUCAGUUGGU-UAGAGCGUGGUGCUAAUAACGCCA-----
+#=GR 23 SS     <<<<-<<.....................<<<<<.......>>>>>.....
+28             GCAUUGGUGGUUCAG-UGGU--AGAAUUCUCGCCUGCCACGCGGG-----
+#=GR 28 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+30             GGCUCGUUGGUCUAG-GGGU--AUGAUUCUCGCUUAGGGUGCGAG-----
+#=GR 30 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+31             GUUUCCGUAGUGUAG-UGGU-UAUCACGUUCGCCUCACACGCGAA-----
+#=GR 31 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+32             GUUUCCGUAGUGUAG-CGGU-UAUCACAUUCGCCUCACACGCGAA-----
+#=GR 32 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+33             UCCUCGUUAGUAUAG-UGGU-GAGUAUCCCCGCCUGUCACGCGGG-----
+#=GR 33 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+35             UCCCUGGUGGUCUAG-UGGC-UAGGAUUCGGCGCUUUCACCGCCG-----
+#=GR 35 SS     <-<<<<<.....................<<<<<.......>>>>>.....
+37             GGUAGCGUGGCCGAG-CGGUCUAAGGCGCUGGAUUUAGGCUCCAGUCUCU
+#=GR 37 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+46             UCCCUGGUGGUCUAG-UGGU-UAGGAUUCGGCGCUCUCACCGCCG-----
+#=GR 46 SS     <<<<<<<.....................<<<<<.......>>>>>.....
+#=GC SS_cons   (((((((,,,,,,,,,,,,,,,,,,,,,<<<<<_______>>>>>,,,,,
+#=GC RF        GuuUuggUAGUuuAGUUGGUCUAGAAcaUUcgcCUcAcAcgcgAAUCUCu
+
+7              CCUGGAGG-CGUGGGUUCGAAUCCCACUUCUGACA
+#=GR 7  SS     ..........<<<<<.......>>>>>>>>>>>>.
+9              -----AGGUCCCCGGUUCGAAACCGGGCAGAAGCA
+#=GR 9  SS     ..........<<<<<.......>>>>>>>>>>>>.
+10             -----GGGUCGUGGGUUCGAGCCCCACGUUGGGCG
+#=GR 10 SS     ..........<<<<<.......>>>>>>>>>>>>.
+15             -----AGGUCCCUGGUUCGAUCCCGGGUUUCGGCA
+#=GR 15 SS     ..........<<<<<.......>>>>>>>>>>>>.
+16             -----AGGCCCCGGGUUCGAUCCCCGGCAUCUCCA
+#=GR 16 SS     ..........<<<<<.......>>>>>>>>>>>>.
+17             -----AGA-CCCGGGUUCAAUUCCCGGCCAAUGCA
+#=GR 17 SS     ..........<<<<<.......>>>>>>>>>>>>.
+18             -----UGA-CCCGGGUUCGAUUCCCGGCCAACGCA
+#=GR 18 SS     ..........<<<<<.......>>>>>>>>>>>>.
+23             -----AGGUCGCGGGUUCGAUCCCCGUACGGGCCA
+#=GR 23 SS     ..........<<<<<.......>>>>>>>->>>>.
+28             -----AGG-CCCGGGUUCGAUUCCCGGCCAAUGCA
+#=GR 28 SS     ..........<<<<<.......>>>>>>>>>>>>.
+30             -----AGGUCCCGGGUUCAAAUCCCGGACGAGCCC
+#=GR 30 SS     ..........<<<<<.......>>>>>>>>>>>>.
+31             -----AGGUCCCCGGUUCGAAACCGGGCGGAAACA
+#=GR 31 SS     ..........<<<<<.......>>>>>>>>>>>>.
+32             -----AGGUCCCCGGUUCGAUCCCGGGCGGAAACA
+#=GR 32 SS     ..........<<<<<.......>>>>>>>>>>>>.
+33             -----AGA-CCGGGGUUCGAUUCCCCGACGGGGAG
+#=GR 33 SS     ..........<<<<<.......>>>>>>>>>>>>.
+35             -----CGC-CCCGGGUUCGAUUCCCGGCCAGGAAU
+#=GR 35 SS     ..........<<<<<.......>>>>>>>>>>->.
+37             UCGG-AGG-CGUGGGUUCGAAUCCCACCGCUGCCA
+#=GR 37 SS     ..........<<<<<.......>>>>>>>>>>>>.
+46             -----CGG-CCCGGGUUCGAUUCCCGGUCAGGGAA
+#=GR 46 SS     ..........<<<<<.......>>>>>>>>>>>>.
+#=GC SS_cons   ,,,,,,,,,,<<<<<_______>>>>>))))))):
+#=GC RF        uCuGgAGGUCCCgGGUUCGAUUCCcGGccaAaaCA
+//