Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison alignment/phytab_mafft.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import os | |
4 import optparse | |
5 import subprocess | |
6 from multiprocessing import Pool | |
7 | |
8 directory = "" | |
9 results = "results.data" | |
10 extension = ".fs" | |
11 aligned_extension = ".afa" | |
12 | |
13 | |
14 def unescape(string): | |
15 mapped_chars = { | |
16 '>': '__gt__', | |
17 '<': '__lt__', | |
18 "'": '__sq__', | |
19 '"': '__dq__', | |
20 '[': '__ob__', | |
21 ']': '__cb__', | |
22 '{': '__oc__', | |
23 '}': '__cc__', | |
24 '@': '__at__', | |
25 '\n': '__cn__', | |
26 '\r': '__cr__', | |
27 '\t': '__tc__', | |
28 '#': '__pd__' | |
29 } | |
30 | |
31 for key, value in mapped_chars.iteritems(): | |
32 string = string.replace(value, key) | |
33 | |
34 return string | |
35 | |
36 | |
37 def isTabular(file): | |
38 with open(file) as f: | |
39 for line in f: | |
40 if line[0] == '>': | |
41 return False | |
42 return True | |
43 | |
44 | |
45 def toData(text): | |
46 text = text.split('\n') | |
47 result = '' | |
48 for line in text: | |
49 if '>' in line: | |
50 line = '\n' + line.replace('> ', "") + '\t' | |
51 line = line.replace(" ", "\t") | |
52 result += line | |
53 return result[1:] # Index past the first newline char | |
54 | |
55 def toDataSingle(text): | |
56 text = text.split('\n') | |
57 result = '' | |
58 for line in text: | |
59 line = line + '\n' | |
60 result += line | |
61 return result[1:] # Index past the first newline char | |
62 | |
63 def mafftauto(input): | |
64 file_name = directory + os.sep + input | |
65 aln = file_name + aligned_extension | |
66 call = subprocess.call(['mafft', '--auto', '--out', aln, file_name]) | |
67 | |
68 def mafft1(input): | |
69 file_name = directory + os.sep + input | |
70 aln = file_name + aligned_extension | |
71 call = subprocess.call(['mafft', '--retree', '1', '--out', aln, file_name]) | |
72 | |
73 def mafft2(input): | |
74 file_name = directory + os.sep + input | |
75 aln = file_name + aligned_extension | |
76 call = subprocess.call(['mafft', '--retree', '2', '--out', aln, file_name]) | |
77 | |
78 def maffti(input): | |
79 file_name = directory + os.sep + input | |
80 aln = file_name + aligned_extension | |
81 call = subprocess.call(['mafft-fftnsi', '--out', aln, file_name]) | |
82 | |
83 def maffteinsi(input): | |
84 file_name = directory + os.sep + input | |
85 aln = file_name + aligned_extension | |
86 call = subprocess.call(['mafft-einsi', '--out', aln, file_name]) | |
87 | |
88 def mafftlinsi(input): | |
89 file_name = directory + os.sep + input | |
90 aln = file_name + aligned_extension | |
91 call = subprocess.call(['mafft-linsi', '--out', aln, file_name]) | |
92 | |
93 def mafftginsi(input): | |
94 file_name = directory + os.sep + input | |
95 aln = file_name + aligned_extension | |
96 call = subprocess.call(['mafft-ginsi', '--out', aln, file_name]) | |
97 | |
98 def mafftqinsi(input): | |
99 file_name = directory + os.sep + input | |
100 aln = file_name + aligned_extension | |
101 call = subprocess.call(['mafft-qinsi', '--out', aln, file_name]) | |
102 | |
103 | |
104 class Sequence: | |
105 def __init__(self, string): | |
106 lis = string.split() | |
107 self.species = lis[0] | |
108 self.family = lis[1] | |
109 self.name = lis[2] | |
110 self.header = ' '.join(lis[:-1]) | |
111 self.sequence = lis[-1] | |
112 self.string = string | |
113 | |
114 def printFASTA(self): | |
115 return '> ' + self.header + '\n' + self.sequence + '\n' | |
116 | |
117 | |
118 def saveMulti(tabFile): | |
119 with open(tabFile) as f: | |
120 for line in f: | |
121 seq = Sequence(line) | |
122 with open(directory + os.sep + seq.family + extension, "a") as p: | |
123 p.write(seq.printFASTA()) | |
124 | |
125 | |
126 def saveSingle(fastaFile): | |
127 with open(fastaFile) as f: | |
128 for line in f: | |
129 with open(directory + os.sep + "fasta" + extension, "a") as p: | |
130 p.write(line) | |
131 | |
132 | |
133 def main(): | |
134 usage = """%prog [options] | |
135 options (listed below) default to 'None' if omitted | |
136 """ | |
137 parser = optparse.OptionParser(usage=usage) | |
138 | |
139 parser.add_option( | |
140 '-d', '--directory', | |
141 metavar="PATH", | |
142 dest='path', | |
143 default='.', | |
144 help='Path to working directory.') | |
145 | |
146 parser.add_option( | |
147 '-i', '--in', | |
148 dest='input', | |
149 action='store', | |
150 type='string', | |
151 metavar="FILE", | |
152 help='Name of input data.') | |
153 | |
154 parser.add_option( | |
155 '-s', '--strat', | |
156 dest='strategy', | |
157 action='store', | |
158 type='string', | |
159 help='Alignement algorithm to use.') | |
160 | |
161 options, args = parser.parse_args() | |
162 | |
163 global directory | |
164 inputFile = unescape(options.input) | |
165 directory = unescape(options.path) + os.sep + "data" | |
166 strategy = unescape(options.strategy) | |
167 | |
168 os.mkdir(directory) | |
169 | |
170 if isTabular(inputFile): | |
171 saveMulti(inputFile) | |
172 else: | |
173 saveSingle(inputFile) | |
174 | |
175 pool = Pool() | |
176 list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)] | |
177 list_of_files = sorted(list_of_files) | |
178 if strategy == 'Auto': | |
179 pool.map(mafftauto, list_of_files) | |
180 | |
181 elif strategy == 'FFT-NS-1': | |
182 pool.map(mafft1, list_of_files) | |
183 | |
184 elif strategy == 'FFT-NS-2': | |
185 pool.map(mafft2, list_of_files) | |
186 | |
187 elif strategy == 'FFT-NS-i': | |
188 pool.map(maffti, list_of_files) | |
189 | |
190 elif strategy == 'E-INS-i': | |
191 pool.map(maffteinsi, list_of_files) | |
192 | |
193 elif strategy == 'L-INS-i': | |
194 pool.map(mafftlinsi, list_of_files) | |
195 | |
196 elif strategy == 'G-INS-i': | |
197 pool.map(mafftginsi, list_of_files) | |
198 | |
199 elif strategy == 'Q-INS-i': | |
200 pool.map(mafftqinsi, list_of_files) | |
201 | |
202 result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)] | |
203 if isTabular(inputFile): | |
204 with open(directory + os.sep + results, "a") as f: | |
205 for file in result: | |
206 with open(directory + os.sep + file, "r") as r: | |
207 f.write(toData(r.read()) + "\n") | |
208 else: | |
209 with open(directory + os.sep + results, "a") as f: | |
210 for file in result: | |
211 with open(directory + os.sep + file, "r") as r: | |
212 f.write(toDataSingle(r.read()) + "\n") | |
213 | |
214 if __name__ == '__main__': | |
215 main() | |
216 |