Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison orthologs/hmmbuild.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 import os | |
2 import optparse | |
3 import subprocess | |
4 from multiprocessing import Pool | |
5 | |
6 directory = "./data" | |
7 results = "results.data" | |
8 extension = ".afa" | |
9 model_extension = ".hmm" | |
10 inputFile = "" | |
11 index_of_name_in_hmm = 6 | |
12 | |
13 | |
14 def unescape(string): | |
15 mapped_chars = { | |
16 '>': '__gt__', | |
17 '<': '__lt__', | |
18 "'": '__sq__', | |
19 '"': '__dq__', | |
20 '[': '__ob__', | |
21 ']': '__cb__', | |
22 '{': '__oc__', | |
23 '}': '__cc__', | |
24 '@': '__at__', | |
25 '\n': '__cn__', | |
26 '\r': '__cr__', | |
27 '\t': '__tc__', | |
28 '#': '__pd__' | |
29 } | |
30 | |
31 for key, value in mapped_chars.iteritems(): | |
32 string = string.replace(value, key) | |
33 | |
34 return string | |
35 | |
36 | |
37 def toData(text): | |
38 lis = text.split() | |
39 name = lis[index_of_name_in_hmm] | |
40 text = name + "\t" + text.replace("\n", "\\n") | |
41 return text | |
42 | |
43 | |
44 def hmmbuild(input): | |
45 file_name = directory + os.sep + input | |
46 # print file_name | |
47 # return subprocess.Popen(['hmmbuild', "--informat", "afa", file_name + ".hmm", file_name], stdout=subprocess.PIPE).communicate()[0] # ./muscle | |
48 pop = subprocess.Popen(['hmmbuild', "--informat", "afa", file_name + ".hmm", file_name]) | |
49 pop.wait() | |
50 | |
51 | |
52 class Sequence: | |
53 def __init__(self, string): | |
54 lis = string.split('\t') | |
55 # print lis | |
56 self.species = lis[0] | |
57 self.family = lis[1] | |
58 self.name = lis[2] | |
59 self.header = ' '.join(lis[:-1]) | |
60 self.sequence = lis[-1] | |
61 self.string = string | |
62 | |
63 def printFASTA(self): | |
64 return '> ' + self.header + '\n' + self.sequence + '\n' | |
65 | |
66 | |
67 def main(): | |
68 usage = """%prog [options] | |
69 options (listed below) default to 'None' if omitted | |
70 """ | |
71 parser = optparse.OptionParser(usage=usage) | |
72 | |
73 parser.add_option( | |
74 '-i', '--in', | |
75 dest='input', | |
76 action='store', | |
77 type='string', | |
78 metavar="FILE", | |
79 help='Name of input data.') | |
80 | |
81 options, args = parser.parse_args() | |
82 | |
83 global inputFile, directory | |
84 inputFile = unescape(options.input) | |
85 | |
86 os.mkdir(directory) | |
87 | |
88 with open(inputFile) as f: | |
89 for line in f: | |
90 seq = Sequence(line) | |
91 with open(directory + os.sep + seq.family + extension, "a") as p: | |
92 p.write(seq.printFASTA()) | |
93 | |
94 pool = Pool() | |
95 list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)] | |
96 | |
97 pool.map(hmmbuild, list_of_files) | |
98 | |
99 result = [file for file in os.listdir(directory) if file.lower().endswith(model_extension)] | |
100 with open(directory + os.sep + results, "a") as f: | |
101 for file in result: | |
102 with open(directory + os.sep + file, "r") as r: | |
103 f.write(toData(r.read()) + "\n") | |
104 | |
105 if __name__ == '__main__': | |
106 main() |