comparison alignment/phytab_mafft.py @ 0:5b9a38ec4a39 draft default tip

First commit of old repositories
author osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu>
date Tue, 11 Mar 2014 12:19:13 -0700
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:5b9a38ec4a39
1 #!/usr/bin/env python
2
3 import os
4 import optparse
5 import subprocess
6 from multiprocessing import Pool
7
8 directory = ""
9 results = "results.data"
10 extension = ".fs"
11 aligned_extension = ".afa"
12
13
14 def unescape(string):
15 mapped_chars = {
16 '>': '__gt__',
17 '<': '__lt__',
18 "'": '__sq__',
19 '"': '__dq__',
20 '[': '__ob__',
21 ']': '__cb__',
22 '{': '__oc__',
23 '}': '__cc__',
24 '@': '__at__',
25 '\n': '__cn__',
26 '\r': '__cr__',
27 '\t': '__tc__',
28 '#': '__pd__'
29 }
30
31 for key, value in mapped_chars.iteritems():
32 string = string.replace(value, key)
33
34 return string
35
36
37 def isTabular(file):
38 with open(file) as f:
39 for line in f:
40 if line[0] == '>':
41 return False
42 return True
43
44
45 def toData(text):
46 text = text.split('\n')
47 result = ''
48 for line in text:
49 if '>' in line:
50 line = '\n' + line.replace('> ', "") + '\t'
51 line = line.replace(" ", "\t")
52 result += line
53 return result[1:] # Index past the first newline char
54
55 def toDataSingle(text):
56 text = text.split('\n')
57 result = ''
58 for line in text:
59 line = line + '\n'
60 result += line
61 return result[1:] # Index past the first newline char
62
63 def mafftauto(input):
64 file_name = directory + os.sep + input
65 aln = file_name + aligned_extension
66 call = subprocess.call(['mafft', '--auto', '--out', aln, file_name])
67
68 def mafft1(input):
69 file_name = directory + os.sep + input
70 aln = file_name + aligned_extension
71 call = subprocess.call(['mafft', '--retree', '1', '--out', aln, file_name])
72
73 def mafft2(input):
74 file_name = directory + os.sep + input
75 aln = file_name + aligned_extension
76 call = subprocess.call(['mafft', '--retree', '2', '--out', aln, file_name])
77
78 def maffti(input):
79 file_name = directory + os.sep + input
80 aln = file_name + aligned_extension
81 call = subprocess.call(['mafft-fftnsi', '--out', aln, file_name])
82
83 def maffteinsi(input):
84 file_name = directory + os.sep + input
85 aln = file_name + aligned_extension
86 call = subprocess.call(['mafft-einsi', '--out', aln, file_name])
87
88 def mafftlinsi(input):
89 file_name = directory + os.sep + input
90 aln = file_name + aligned_extension
91 call = subprocess.call(['mafft-linsi', '--out', aln, file_name])
92
93 def mafftginsi(input):
94 file_name = directory + os.sep + input
95 aln = file_name + aligned_extension
96 call = subprocess.call(['mafft-ginsi', '--out', aln, file_name])
97
98 def mafftqinsi(input):
99 file_name = directory + os.sep + input
100 aln = file_name + aligned_extension
101 call = subprocess.call(['mafft-qinsi', '--out', aln, file_name])
102
103
104 class Sequence:
105 def __init__(self, string):
106 lis = string.split()
107 self.species = lis[0]
108 self.family = lis[1]
109 self.name = lis[2]
110 self.header = ' '.join(lis[:-1])
111 self.sequence = lis[-1]
112 self.string = string
113
114 def printFASTA(self):
115 return '> ' + self.header + '\n' + self.sequence + '\n'
116
117
118 def saveMulti(tabFile):
119 with open(tabFile) as f:
120 for line in f:
121 seq = Sequence(line)
122 with open(directory + os.sep + seq.family + extension, "a") as p:
123 p.write(seq.printFASTA())
124
125
126 def saveSingle(fastaFile):
127 with open(fastaFile) as f:
128 for line in f:
129 with open(directory + os.sep + "fasta" + extension, "a") as p:
130 p.write(line)
131
132
133 def main():
134 usage = """%prog [options]
135 options (listed below) default to 'None' if omitted
136 """
137 parser = optparse.OptionParser(usage=usage)
138
139 parser.add_option(
140 '-d', '--directory',
141 metavar="PATH",
142 dest='path',
143 default='.',
144 help='Path to working directory.')
145
146 parser.add_option(
147 '-i', '--in',
148 dest='input',
149 action='store',
150 type='string',
151 metavar="FILE",
152 help='Name of input data.')
153
154 parser.add_option(
155 '-s', '--strat',
156 dest='strategy',
157 action='store',
158 type='string',
159 help='Alignement algorithm to use.')
160
161 options, args = parser.parse_args()
162
163 global directory
164 inputFile = unescape(options.input)
165 directory = unescape(options.path) + os.sep + "data"
166 strategy = unescape(options.strategy)
167
168 os.mkdir(directory)
169
170 if isTabular(inputFile):
171 saveMulti(inputFile)
172 else:
173 saveSingle(inputFile)
174
175 pool = Pool()
176 list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)]
177 list_of_files = sorted(list_of_files)
178 if strategy == 'Auto':
179 pool.map(mafftauto, list_of_files)
180
181 elif strategy == 'FFT-NS-1':
182 pool.map(mafft1, list_of_files)
183
184 elif strategy == 'FFT-NS-2':
185 pool.map(mafft2, list_of_files)
186
187 elif strategy == 'FFT-NS-i':
188 pool.map(maffti, list_of_files)
189
190 elif strategy == 'E-INS-i':
191 pool.map(maffteinsi, list_of_files)
192
193 elif strategy == 'L-INS-i':
194 pool.map(mafftlinsi, list_of_files)
195
196 elif strategy == 'G-INS-i':
197 pool.map(mafftginsi, list_of_files)
198
199 elif strategy == 'Q-INS-i':
200 pool.map(mafftqinsi, list_of_files)
201
202 result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)]
203 if isTabular(inputFile):
204 with open(directory + os.sep + results, "a") as f:
205 for file in result:
206 with open(directory + os.sep + file, "r") as r:
207 f.write(toData(r.read()) + "\n")
208 else:
209 with open(directory + os.sep + results, "a") as f:
210 for file in result:
211 with open(directory + os.sep + file, "r") as r:
212 f.write(toDataSingle(r.read()) + "\n")
213
214 if __name__ == '__main__':
215 main()
216