comparison orthologs/hmmbuild.py @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5b9a38ec4a39
1 import os
2 import optparse
3 import subprocess
4 from multiprocessing import Pool
5
6 directory = "./data"
7 results = "results.data"
8 extension = ".afa"
9 model_extension = ".hmm"
10 inputFile = ""
11 index_of_name_in_hmm = 6
12
13
14 def unescape(string):
15 mapped_chars = {
16 '>': '__gt__',
17 '<': '__lt__',
18 "'": '__sq__',
19 '"': '__dq__',
20 '[': '__ob__',
21 ']': '__cb__',
22 '{': '__oc__',
23 '}': '__cc__',
24 '@': '__at__',
25 '\n': '__cn__',
26 '\r': '__cr__',
27 '\t': '__tc__',
28 '#': '__pd__'
29 }
30
31 for key, value in mapped_chars.iteritems():
32 string = string.replace(value, key)
33
34 return string
35
36
37 def toData(text):
38 lis = text.split()
39 name = lis[index_of_name_in_hmm]
40 text = name + "\t" + text.replace("\n", "\\n")
41 return text
42
43
44 def hmmbuild(input):
45 file_name = directory + os.sep + input
46 # print file_name
47 # return subprocess.Popen(['hmmbuild', "--informat", "afa", file_name + ".hmm", file_name], stdout=subprocess.PIPE).communicate()[0] # ./muscle
48 pop = subprocess.Popen(['hmmbuild', "--informat", "afa", file_name + ".hmm", file_name])
49 pop.wait()
50
51
52 class Sequence:
53 def __init__(self, string):
54 lis = string.split('\t')
55 # print lis
56 self.species = lis[0]
57 self.family = lis[1]
58 self.name = lis[2]
59 self.header = ' '.join(lis[:-1])
60 self.sequence = lis[-1]
61 self.string = string
62
63 def printFASTA(self):
64 return '> ' + self.header + '\n' + self.sequence + '\n'
65
66
67 def main():
68 usage = """%prog [options]
69 options (listed below) default to 'None' if omitted
70 """
71 parser = optparse.OptionParser(usage=usage)
72
73 parser.add_option(
74 '-i', '--in',
75 dest='input',
76 action='store',
77 type='string',
78 metavar="FILE",
79 help='Name of input data.')
80
81 options, args = parser.parse_args()
82
83 global inputFile, directory
84 inputFile = unescape(options.input)
85
86 os.mkdir(directory)
87
88 with open(inputFile) as f:
89 for line in f:
90 seq = Sequence(line)
91 with open(directory + os.sep + seq.family + extension, "a") as p:
92 p.write(seq.printFASTA())
93
94 pool = Pool()
95 list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)]
96
97 pool.map(hmmbuild, list_of_files)
98
99 result = [file for file in os.listdir(directory) if file.lower().endswith(model_extension)]
100 with open(directory + os.sep + results, "a") as f:
101 for file in result:
102 with open(directory + os.sep + file, "r") as r:
103 f.write(toData(r.read()) + "\n")
104
105 if __name__ == '__main__':
106 main()