# HG changeset patch
# User rnateam
# Date 1483571707 18000
# Node ID 79b9117aef0158b410cbe13f1ef2808b7ea120b0
# Parent b8e32e577597c5fb41aab6d587a617e10844b1d2
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
diff -r b8e32e577597 -r 79b9117aef01 addCdhitseqs.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/addCdhitseqs.py Wed Jan 04 18:15:07 2017 -0500
@@ -0,0 +1,59 @@
+import re
+import glob
+import sys
+
+cdhitcluster = sys.argv[1]
+#clusters = sys.argv[2]
+
+cluster_seqs_stats_path = "RESULTS/*.cluster.all"
+cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path)
+
+#clusterFiles = clusters.split(',')
+repSeqRedSeqdict = {}
+repLine = ""
+count = 0
+first = False
+
+with open(cdhitcluster, 'r+') as f:
+ lines = f.readlines()
+ for i in range(0, len(lines)):
+ line = lines[i]
+ if ">Cluster" in line:
+ first = True
+ count = 0
+ if i+1 < len(lines):
+ repLine = lines[i+1]
+ continue
+ elif not first:
+ count += 1
+ first = False
+ else:
+ first = False
+ lineArr = []
+ if count > 0:
+ repLine = repLine.strip()
+ rep_FullId = repLine.split()[2]
+ rep_FullId = rep_FullId.replace(">", "")
+ #rep_short_id = re.findall("_".join(["[^_]+"] * 2), rep_FullId)[0]
+ rep_FullId = rep_FullId.replace("...", "")
+ line = line.strip()
+ add_FullId = line.split()[2]
+ add_FullId = add_FullId.replace(">", "")
+ add_FullId = add_FullId.replace("...", "")
+ #add_short_id = re.findall("_".join(["[^_]+"] * 2), add_FullId)[0]
+ lineArr.append(add_FullId)
+ repSeqRedSeqdict[rep_FullId] = lineArr
+ #lineArr.append(add_short_id)
+ #repSeqRedSeqdict[rep_short_id] = lineArr
+
+toWrite = ""
+
+for singleFile in sorted(cluster_seqs_stats_files):
+ with open(singleFile, "a+") as clFile:
+ file_content = clFile.read()
+ first_line = file_content.split('\n')[0]
+ for key, val in repSeqRedSeqdict.items():
+ if key in file_content:
+ for i in val:
+ toWrite += first_line.split()[0] + " " + first_line.split()[1] + " " + first_line.split()[2] + " " + " - " + " " + "CD-Hit" + " " + first_line.split()[5] + " " + "ORIGID" + " " + str(i) + "\n"
+ clFile.write(toWrite)
diff -r b8e32e577597 -r 79b9117aef01 evaluation.py
--- a/evaluation.py Sat Dec 24 18:08:36 2016 -0500
+++ b/evaluation.py Wed Jan 04 18:15:07 2017 -0500
@@ -49,16 +49,10 @@
full.write(toWrite)
-listOfClasses = []
-listOfClusters = []
pattern = re.compile("^RF.*$")
if len(listOfClasses) > 0 and pattern.match(str(listOfClasses[0])):
- with open("RESULTS/fullTab.tabular", "r") as tabF:
- for line in tabF.readlines():
- listOfClasses.append(line.split()[0])
- listOfClusters.append(line.split()[1])
completeness_score = metrics.completeness_score(listOfClasses, listOfClusters)
homogeneity_score = metrics.homogeneity_score(listOfClasses, listOfClusters)
diff -r b8e32e577597 -r 79b9117aef01 glob_report.xml
--- a/glob_report.xml Sat Dec 24 18:08:36 2016 -0500
+++ b/glob_report.xml Wed Jan 04 18:15:07 2017 -0500
@@ -9,7 +9,6 @@
/dev/null &&
#set $inputFiles = ""
@@ -25,17 +24,29 @@
#set $inputFilesTrees += str($mods)+','
#end for
#set $inputFilesTrees = $inputFilesTrees[:-1]
-
- 'glob_res.pl' '$inputFiles' $merge_cluster_ol $merge_overlap $min_cluster_size $cm_min_bitscore $cm_max_eval $cm_bitscore_sig $partition_type '' $cut_type '$inputFilesTrees'
+ glob_res.pl
+ '$inputFiles'
+ $merge_cluster_ol
+ $merge_overlap
+ $min_cluster_size
+ $cm_min_bitscore
+ $cm_max_eval
+ $cm_bitscore_sig
+ $partition_type ''
+ $cut_type
+ '$inputFilesTrees'
#if $iteration_num.iteration_num_selector:
$iteration_num.CI
-
$final_partition_soft
$final_partition_used_cmsearch
#end if
&&
python '$__tool_directory__/evaluation.py'
+ #if $cdhit:
+ &&
+ python '$__tool_directory__/addCdhitseqs.py' '$cdhit'
+ #end if
]]>
@@ -44,6 +55,7 @@
+