Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison orthologs/hmmsearch.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 import os | |
2 import optparse | |
3 import subprocess | |
4 from multiprocessing import Pool | |
5 | |
6 results_dir = "./data" | |
7 results = "results.data" | |
8 result_extension = ".out" | |
9 model_extension = ".hmm" | |
10 database = "" | |
11 | |
12 | |
13 def unescape(string): | |
14 mapped_chars = { | |
15 '>': '__gt__', | |
16 '<': '__lt__', | |
17 "'": '__sq__', | |
18 '"': '__dq__', | |
19 '[': '__ob__', | |
20 ']': '__cb__', | |
21 '{': '__oc__', | |
22 '}': '__cc__', | |
23 '@': '__at__', | |
24 '\n': '__cn__', | |
25 '\r': '__cr__', | |
26 '\t': '__tc__', | |
27 '#': '__pd__' | |
28 } | |
29 | |
30 for key, value in mapped_chars.iteritems(): | |
31 string = string.replace(value, key) | |
32 | |
33 return string | |
34 | |
35 | |
36 def unpackData(models): | |
37 with open(models) as f: | |
38 for line in f: | |
39 hmm = HMM(line) | |
40 with open(results_dir + os.sep + hmm.name + model_extension, "a") as p: | |
41 # print(hmm.model) | |
42 p.write(hmm.model) | |
43 | |
44 | |
45 class HMM: | |
46 def __init__(self, string): | |
47 lis = string.split('\t') | |
48 # print lis | |
49 self.model = self.restoreNewLines(lis[1]) | |
50 self.name = lis[0] | |
51 | |
52 def restoreNewLines(self, string): | |
53 return string.replace('\\n', '\n') | |
54 | |
55 | |
56 def toData(text): | |
57 # lis = text.split() | |
58 # name = lis[index_of_name_in_hmm] | |
59 # text = name + "\t" + text.replace("\n", "\\n") | |
60 # text = text.replace("\n", "\\n") | |
61 return text | |
62 | |
63 | |
64 def hmmsearch(input): | |
65 file_name = results_dir + os.sep + input | |
66 # print file_name | |
67 # return subprocess.Popen(['hmmbuild', "--informat", "afa", file_name + ".hmm", file_name], stdout=subprocess.PIPE).communicate()[0] # ./muscle | |
68 pop = subprocess.Popen(['hmmsearch', "-o", file_name + result_extension, file_name, database]) | |
69 pop.wait() | |
70 | |
71 | |
72 def main(): | |
73 usage = """%prog [options] | |
74 options (listed below) default to 'None' if omitted | |
75 """ | |
76 parser = optparse.OptionParser(usage=usage) | |
77 | |
78 parser.add_option( | |
79 '-i', '--hmm', | |
80 dest='hmm', | |
81 action='store', | |
82 type='string', | |
83 metavar="FILE", | |
84 help='Name of input hmm models.') | |
85 | |
86 parser.add_option( | |
87 '-d', '--database', | |
88 dest='database', | |
89 action='store', | |
90 type='string', | |
91 metavar="FILE", | |
92 help='Name of sequence database.') | |
93 | |
94 options, args = parser.parse_args() | |
95 | |
96 global database | |
97 models = unescape(options.hmm) | |
98 database = unescape(options.database) | |
99 | |
100 os.mkdir(results_dir) | |
101 | |
102 unpackData(models) | |
103 | |
104 list_of_files = [file for file in os.listdir(results_dir) if file.lower().endswith(model_extension)] | |
105 | |
106 pool = Pool() | |
107 pool.map(hmmsearch, list_of_files) | |
108 | |
109 result = [file for file in os.listdir(results_dir) if file.lower().endswith(result_extension)] | |
110 with open(results_dir + os.sep + results, "a") as f: | |
111 for file in result: | |
112 with open(results_dir + os.sep + file, "r") as r: | |
113 f.write(toData(r.read()) + "\n") | |
114 | |
115 if __name__ == '__main__': | |
116 main() |