Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison phylogenies/phytab_clearcut.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 import os | |
2 import optparse | |
3 import subprocess | |
4 from multiprocessing import Pool | |
5 | |
6 directory = "" | |
7 results = "results.data" | |
8 extension = ".fs" | |
9 aligned_extension = ".tre" | |
10 datatype = "" | |
11 | |
12 def unescape(string): | |
13 mapped_chars = { | |
14 '>': '__gt__', | |
15 '<': '__lt__', | |
16 "'": '__sq__', | |
17 '"': '__dq__', | |
18 '[': '__ob__', | |
19 ']': '__cb__', | |
20 '{': '__oc__', | |
21 '}': '__cc__', | |
22 '@': '__at__', | |
23 '\n': '__cn__', | |
24 '\r': '__cr__', | |
25 '\t': '__tc__', | |
26 '#': '__pd__' | |
27 } | |
28 | |
29 for key, value in mapped_chars.iteritems(): | |
30 string = string.replace(value, key) | |
31 | |
32 return string | |
33 | |
34 | |
35 def isTabular(file): | |
36 with open(file) as f: | |
37 for line in f: | |
38 if line[0] == '>': | |
39 return False | |
40 return True | |
41 | |
42 def toData(text, name): | |
43 name = name.replace("fasta", "") #file name has fasta when fasta file called | |
44 text = name.replace(".fs.tre", "") + "\t" + text.replace(" " , "") | |
45 return text | |
46 | |
47 # | |
48 #def toData(text): | |
49 # text = text.split('\n') | |
50 # result = '' | |
51 # for line in text: | |
52 # if '>' in line: | |
53 # line = '\n' + line.replace('>', "") + '\t' | |
54 # line = line.replace(" ", "\t") | |
55 # result += line | |
56 # return result[1:] # Index past the first newline char | |
57 | |
58 def clearcut(input): | |
59 file_name = directory + os.sep + input | |
60 popen = subprocess.Popen(['clearcut', "--in=" + file_name, "--out="+file_name + aligned_extension, "--alignment","-k", indata]) | |
61 popen.wait() | |
62 | |
63 class Sequence: | |
64 def __init__(self, string): | |
65 lis = string.split() | |
66 self.species = lis[0] | |
67 self.family = lis[1] | |
68 self.name = lis[2] | |
69 self.header = ' '.join(lis[:-1]) | |
70 self.sequence = lis[-1] | |
71 self.string = string | |
72 | |
73 def printFASTA(self): | |
74 return '>' + self.header + '\n' + self.sequence + '\n' | |
75 | |
76 def saveMulti(tabFile): | |
77 with open(tabFile) as f: | |
78 for line in f: | |
79 seq = Sequence(line) | |
80 with open(directory + os.sep + seq.family + extension, "a") as p: | |
81 p.write(seq.printFASTA()) | |
82 | |
83 def saveSingle(fastaFile): | |
84 with open(fastaFile) as f: | |
85 for line in f: | |
86 with open(directory + os.sep + "fasta" + extension, "a") as p: | |
87 p.write(line) | |
88 | |
89 def main(): | |
90 usage = """%prog [options] | |
91 options (listed below) default to 'None' if omitted | |
92 """ | |
93 parser = optparse.OptionParser(usage=usage) | |
94 | |
95 parser.add_option( | |
96 '-d', '--directory', | |
97 metavar="PATH", | |
98 dest='path', | |
99 default='.', | |
100 help='Path to working directory.') | |
101 | |
102 parser.add_option( | |
103 '-i', '--in', | |
104 dest='input', | |
105 action='store', | |
106 type='string', | |
107 metavar="FILE", | |
108 help='Name of input data.') | |
109 | |
110 parser.add_option( | |
111 '-t', '--type', | |
112 dest='datatype', | |
113 action='store', | |
114 type='string', | |
115 help='-P for protein. -D for DNA.') | |
116 | |
117 options, args = parser.parse_args() | |
118 | |
119 global directory | |
120 global indata | |
121 inputFile = unescape(options.input) | |
122 directory = unescape(options.path) + os.sep + "data" | |
123 indata = "-" + unescape(options.datatype) | |
124 | |
125 os.mkdir(directory) | |
126 | |
127 if isTabular(inputFile): | |
128 saveMulti(inputFile) | |
129 else: | |
130 saveSingle(inputFile) | |
131 | |
132 pool = Pool() | |
133 list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)] | |
134 pool.map(clearcut, list_of_files) | |
135 | |
136 result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)] | |
137 with open(directory + os.sep + results, "a") as f: | |
138 for file in result: | |
139 with open(directory + os.sep + file, "r") as r: | |
140 f.write(toData(r.read(),file)) | |
141 | |
142 if __name__ == '__main__': | |
143 main() |