Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison alignment/phytab_muscle.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 import os | |
2 import optparse | |
3 import subprocess | |
4 from multiprocessing import Pool | |
5 | |
6 directory = "" | |
7 results = "results.data" | |
8 extension = ".fs" | |
9 aligned_extension = ".afa" | |
10 | |
11 | |
12 def unescape(string): | |
13 mapped_chars = { | |
14 '>': '__gt__', | |
15 '<': '__lt__', | |
16 "'": '__sq__', | |
17 '"': '__dq__', | |
18 '[': '__ob__', | |
19 ']': '__cb__', | |
20 '{': '__oc__', | |
21 '}': '__cc__', | |
22 '@': '__at__', | |
23 '\n': '__cn__', | |
24 '\r': '__cr__', | |
25 '\t': '__tc__', | |
26 '#': '__pd__' | |
27 } | |
28 | |
29 for key, value in mapped_chars.iteritems(): | |
30 string = string.replace(value, key) | |
31 | |
32 return string | |
33 | |
34 | |
35 def isTabular(file): | |
36 with open(file) as f: | |
37 for line in f: | |
38 if line[0] == '>': | |
39 return False | |
40 return True | |
41 | |
42 | |
43 def toData(text): | |
44 text = text.split('\n') | |
45 result = '' | |
46 for line in text: | |
47 if '>' in line: | |
48 line = '\n' + line.replace('> ', "") + '\t' | |
49 line = line.replace(" ", "\t") | |
50 result += line | |
51 return result[1:] # Index past the first newline char | |
52 | |
53 def toDataSingle(text): | |
54 text = text.split('\n') | |
55 result = '' | |
56 for line in text: | |
57 line = line + '\n' | |
58 result += line | |
59 return result[1:] # Index past the first newline char | |
60 | |
61 def muscle(input): | |
62 file_name = directory + os.sep + input | |
63 popen = subprocess.Popen(['muscle', "-in", file_name, "-out", file_name + aligned_extension]) # ./muscle | |
64 popen.wait() | |
65 | |
66 popen = subprocess.Popen(['pwd']) # ./muscle | |
67 popen.wait() | |
68 | |
69 | |
70 class Sequence: | |
71 def __init__(self, string): | |
72 lis = string.split() | |
73 self.species = lis[0] | |
74 self.family = lis[1] | |
75 self.name = lis[2] | |
76 self.header = ' '.join(lis[:-1]) | |
77 self.sequence = lis[-1] | |
78 self.string = string | |
79 | |
80 def printFASTA(self): | |
81 return '> ' + self.header + '\n' + self.sequence + '\n' | |
82 | |
83 | |
84 def saveMulti(tabFile): | |
85 with open(tabFile) as f: | |
86 for line in f: | |
87 seq = Sequence(line) | |
88 with open(directory + os.sep + seq.family + extension, "a") as p: | |
89 p.write(seq.printFASTA()) | |
90 | |
91 | |
92 def saveSingle(fastaFile): | |
93 with open(fastaFile) as f: | |
94 for line in f: | |
95 with open(directory + os.sep + "fasta" + extension, "a") as p: | |
96 p.write(line) | |
97 | |
98 | |
99 def main(): | |
100 usage = """%prog [options] | |
101 options (listed below) default to 'None' if omitted | |
102 """ | |
103 parser = optparse.OptionParser(usage=usage) | |
104 | |
105 parser.add_option( | |
106 '-d', '--directory', | |
107 metavar="PATH", | |
108 dest='path', | |
109 default='.', | |
110 help='Path to working directory.') | |
111 | |
112 parser.add_option( | |
113 '-i', '--in', | |
114 dest='input', | |
115 action='store', | |
116 type='string', | |
117 metavar="FILE", | |
118 help='Name of input data.') | |
119 | |
120 options, args = parser.parse_args() | |
121 | |
122 global directory | |
123 inputFile = unescape(options.input) | |
124 directory = unescape(options.path) + os.sep + "data" | |
125 | |
126 os.mkdir(directory) | |
127 | |
128 if isTabular(inputFile): | |
129 saveMulti(inputFile) | |
130 else: | |
131 saveSingle(inputFile) | |
132 | |
133 pool = Pool() | |
134 list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)] | |
135 pool.map(muscle, list_of_files) | |
136 | |
137 result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)] | |
138 if isTabular(inputFile): | |
139 with open(directory + os.sep + results, "a") as f: | |
140 for file in result: | |
141 with open(directory + os.sep + file, "r") as r: | |
142 f.write(toData(r.read()) + "\n") | |
143 else: | |
144 with open(directory + os.sep + results, "a") as f: | |
145 for file in result: | |
146 with open(directory + os.sep + file, "r") as r: | |
147 f.write(toDataSingle(r.read()) + "\n") | |
148 | |
149 if __name__ == '__main__': | |
150 main() |