Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison alignment/phytab_aliscorecut.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 import os | |
2 import optparse | |
3 import subprocess | |
4 from multiprocessing import Pool | |
5 import shutil | |
6 | |
7 results_dir = "./data" | |
8 results = "results.data" | |
9 fasta_extension = ".afa" | |
10 alicut_prefix = "ALICUT_" | |
11 familyList = [] | |
12 galaxy_tool_dir = "/home/galaxy/bin/" | |
13 forbidden_chars = { | |
14 '(': '__rb__', | |
15 ')': '__lb__', | |
16 ':': '__co__', | |
17 ';': '__sc__', | |
18 ',': '__cm__', | |
19 '--': '__dd__', | |
20 '*': '__st__', | |
21 '|': '__pi__', | |
22 ' ': '__sp__' | |
23 } | |
24 | |
25 | |
26 def unescape(string): | |
27 mapped_chars = { | |
28 '>': '__gt__', | |
29 '<': '__lt__', | |
30 "'": '__sq__', | |
31 '"': '__dq__', | |
32 '[': '__ob__', | |
33 ']': '__cb__', | |
34 '{': '__oc__', | |
35 '}': '__cc__', | |
36 '@': '__at__', | |
37 '\n': '__cn__', | |
38 '\r': '__cr__', | |
39 '\t': '__tc__', | |
40 '#': '__pd__' | |
41 } | |
42 | |
43 for key, value in mapped_chars.iteritems(): | |
44 string = string.replace(value, key) | |
45 | |
46 return string | |
47 | |
48 | |
49 def unpackData(families): | |
50 with open(families) as f: | |
51 for line in f: | |
52 seq = Sequence(line) | |
53 with open(results_dir + os.sep + seq.family + fasta_extension, "a") as p: | |
54 p.write(seq.printFASTA()) | |
55 | |
56 | |
57 class Sequence: | |
58 def __init__(self, string): | |
59 lis = string.split('\t') | |
60 self.species = lis[0] | |
61 self.family = lis[1] | |
62 self.name = lis[2] | |
63 self.header = ' '.join(lis[:-1]) | |
64 self.sequence = lis[-1] | |
65 self.string = string | |
66 | |
67 def escapedHeader(self): | |
68 string = self.header | |
69 for key, value in forbidden_chars.iteritems(): | |
70 string = string.replace(key, value) | |
71 return string | |
72 | |
73 def printFASTA(self): | |
74 return '>' + self.escapedHeader() + '\n' + self.sequence + '\n' | |
75 | |
76 | |
77 def unescapeHeader(header): | |
78 string = header | |
79 for key, value in forbidden_chars.iteritems(): | |
80 string = string.replace(value, key) | |
81 return string | |
82 | |
83 | |
84 def toData(text): | |
85 text = text.split('\n') | |
86 result = '' | |
87 for line in text: | |
88 if '>' in line: | |
89 line = '\n' + unescapeHeader(line.replace('>', "")) + '\t' | |
90 line = line.replace(" ", "\t") | |
91 result += line | |
92 return result[1:] # Index past the first newline char | |
93 | |
94 | |
95 def aliscore(input): | |
96 file_name = results_dir + os.sep + input | |
97 # print file_name | |
98 pop = subprocess.Popen(["perl", "-I", galaxy_tool_dir, galaxy_tool_dir + "Aliscore.02.pl", "-i", file_name]) | |
99 pop.wait() | |
100 | |
101 | |
102 def main(): | |
103 usage = """%prog [options] | |
104 options (listed below) default to 'None' if omitted | |
105 """ | |
106 parser = optparse.OptionParser(usage=usage) | |
107 | |
108 parser.add_option( | |
109 '-i', '--input', | |
110 dest='families', | |
111 action='store', | |
112 type='string', | |
113 metavar="FILE", | |
114 help='Name of input sequences.') | |
115 | |
116 options, args = parser.parse_args() | |
117 | |
118 families = unescape(options.families) | |
119 | |
120 os.mkdir(results_dir) | |
121 | |
122 unpackData(families) | |
123 | |
124 list_of_files = [file for file in os.listdir(results_dir) if file.lower().endswith(fasta_extension)] | |
125 | |
126 pool = Pool() | |
127 pool.map(aliscore, list_of_files) | |
128 | |
129 alicut = "ALICUT_V2.0_modified.pl" | |
130 shutil.copy(galaxy_tool_dir + alicut, results_dir + os.sep + alicut) | |
131 os.chdir(results_dir) | |
132 pop = subprocess.Popen(["perl", "./" + alicut]) | |
133 pop.wait() | |
134 os.chdir("../") | |
135 | |
136 result = [file for file in os.listdir(results_dir) if file.startswith(alicut_prefix)] | |
137 with open(results_dir + os.sep + results, "a") as f: | |
138 for file in result: | |
139 if file.endswith(fasta_extension): | |
140 with open(results_dir + os.sep + file, "r") as r: | |
141 f.write(toData(r.read()) + "\n") | |
142 | |
143 if __name__ == '__main__': | |
144 main() |