Mercurial > repos > rnateam > graphclust_postprocessing
comparison addCdhitseqs.py @ 3:79b9117aef01 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/branchForIterations/tools/GraphClust/CollectResults commit c03cf64554289eb098267c0923cf0cf7b245cc0c
author | rnateam |
---|---|
date | Wed, 04 Jan 2017 18:15:07 -0500 |
parents | |
children | 869a6e807d76 |
comparison
equal
deleted
inserted
replaced
2:b8e32e577597 | 3:79b9117aef01 |
---|---|
1 import re | |
2 import glob | |
3 import sys | |
4 | |
5 cdhitcluster = sys.argv[1] | |
6 #clusters = sys.argv[2] | |
7 | |
8 cluster_seqs_stats_path = "RESULTS/*.cluster.all" | |
9 cluster_seqs_stats_files = glob.glob(cluster_seqs_stats_path) | |
10 | |
11 #clusterFiles = clusters.split(',') | |
12 repSeqRedSeqdict = {} | |
13 repLine = "" | |
14 count = 0 | |
15 first = False | |
16 | |
17 with open(cdhitcluster, 'r+') as f: | |
18 lines = f.readlines() | |
19 for i in range(0, len(lines)): | |
20 line = lines[i] | |
21 if ">Cluster" in line: | |
22 first = True | |
23 count = 0 | |
24 if i+1 < len(lines): | |
25 repLine = lines[i+1] | |
26 continue | |
27 elif not first: | |
28 count += 1 | |
29 first = False | |
30 else: | |
31 first = False | |
32 lineArr = [] | |
33 if count > 0: | |
34 repLine = repLine.strip() | |
35 rep_FullId = repLine.split()[2] | |
36 rep_FullId = rep_FullId.replace(">", "") | |
37 #rep_short_id = re.findall("_".join(["[^_]+"] * 2), rep_FullId)[0] | |
38 rep_FullId = rep_FullId.replace("...", "") | |
39 line = line.strip() | |
40 add_FullId = line.split()[2] | |
41 add_FullId = add_FullId.replace(">", "") | |
42 add_FullId = add_FullId.replace("...", "") | |
43 #add_short_id = re.findall("_".join(["[^_]+"] * 2), add_FullId)[0] | |
44 lineArr.append(add_FullId) | |
45 repSeqRedSeqdict[rep_FullId] = lineArr | |
46 #lineArr.append(add_short_id) | |
47 #repSeqRedSeqdict[rep_short_id] = lineArr | |
48 | |
49 toWrite = "" | |
50 | |
51 for singleFile in sorted(cluster_seqs_stats_files): | |
52 with open(singleFile, "a+") as clFile: | |
53 file_content = clFile.read() | |
54 first_line = file_content.split('\n')[0] | |
55 for key, val in repSeqRedSeqdict.items(): | |
56 if key in file_content: | |
57 for i in val: | |
58 toWrite += first_line.split()[0] + " " + first_line.split()[1] + " " + first_line.split()[2] + " " + " - " + " " + "CD-Hit" + " " + first_line.split()[5] + " " + "ORIGID" + " " + str(i) + "\n" | |
59 clFile.write(toWrite) |