Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison phylostatistics/phytab_LB_pruner.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 import os | |
2 import optparse | |
3 import subprocess | |
4 from multiprocessing import Pool | |
5 | |
6 directory = "" | |
7 results = "results.data" | |
8 extension = "" | |
9 aligned_extension = ".tab" | |
10 datatype = "" | |
11 | |
12 perlpath = "/home/galaxy-dist/tools/osiris/tree-manipulation/" | |
13 | |
14 def unescape(string): | |
15 mapped_chars = { | |
16 '>': '__gt__', | |
17 '<': '__lt__', | |
18 "'": '__sq__', | |
19 '"': '__dq__', | |
20 '[': '__ob__', | |
21 ']': '__cb__', | |
22 '{': '__oc__', | |
23 '}': '__cc__', | |
24 '@': '__at__', | |
25 '\n': '__cn__', | |
26 '\r': '__cr__', | |
27 '\t': '__tc__', | |
28 '#': '__pd__' | |
29 } | |
30 | |
31 for key, value in mapped_chars.iteritems(): | |
32 string = string.replace(value, key) | |
33 | |
34 return string | |
35 | |
36 | |
37 def isTabular(file): | |
38 with open(file) as f: | |
39 for line in f: | |
40 if line[0] == '>': | |
41 return False | |
42 return True | |
43 | |
44 #def toData(text, name): | |
45 # name = name.replace("fasta", "") #file name has fasta when fasta file called | |
46 # text = name.replace(".fs.tre", "") + "\t" + text.replace(" " , "") | |
47 # return text | |
48 | |
49 | |
50 def toData(text, name): | |
51 text = text.split('\n') | |
52 result = '' | |
53 for line in text: | |
54 if '\t' in line: | |
55 line = line.replace("./data/","") + "\n" | |
56 result += line | |
57 return result # Index past the first newline char | |
58 | |
59 def LB_pruner(input): | |
60 file_name = directory + os.sep + input | |
61 popen = subprocess.Popen(['perl', perlpath+'LB_prunerG.pl', file_name, indata, file_name + aligned_extension]) | |
62 popen.wait() | |
63 | |
64 class Sequence: | |
65 def __init__(self, string): | |
66 lis = string.split() | |
67 self.name = lis[0] | |
68 self.tree = lis[1] | |
69 self.string = string | |
70 | |
71 def printFASTA(self): | |
72 return self.tree + '\n' | |
73 | |
74 def saveMulti(tabFile): | |
75 with open(tabFile) as f: | |
76 for line in f: | |
77 seq = Sequence(line) | |
78 with open(directory + os.sep + seq.name + extension, "a") as p: | |
79 p.write(seq.printFASTA()) | |
80 | |
81 def saveSingle(fastaFile): | |
82 with open(fastaFile) as f: | |
83 for line in f: | |
84 with open(directory + os.sep + "fasta" + extension, "a") as p: | |
85 p.write(line) | |
86 | |
87 def main(): | |
88 usage = """%prog [options] | |
89 options (listed below) default to 'None' if omitted | |
90 """ | |
91 parser = optparse.OptionParser(usage=usage) | |
92 | |
93 parser.add_option( | |
94 '-d', '--directory', | |
95 metavar="PATH", | |
96 dest='path', | |
97 default='.', | |
98 help='Path to working directory.') | |
99 | |
100 parser.add_option( | |
101 '-i', '--in', | |
102 dest='input', | |
103 action='store', | |
104 type='string', | |
105 metavar="FILE", | |
106 help='Name of input data.') | |
107 | |
108 parser.add_option( | |
109 '-m', '--mult', | |
110 dest='datatype', | |
111 action='store', | |
112 type='string', | |
113 help='Multiplier') | |
114 | |
115 options, args = parser.parse_args() | |
116 | |
117 global directory | |
118 global indata | |
119 inputFile = unescape(options.input) | |
120 directory = unescape(options.path) + os.sep + "data" | |
121 indata = unescape(options.datatype) | |
122 | |
123 os.mkdir(directory) | |
124 | |
125 if isTabular(inputFile): | |
126 saveMulti(inputFile) | |
127 else: | |
128 saveSingle(inputFile) | |
129 | |
130 pool = Pool() | |
131 list_of_files = [file for file in os.listdir(directory) if file.lower().endswith(extension)] | |
132 pool.map(LB_pruner, list_of_files) | |
133 | |
134 result = [file for file in os.listdir(directory) if file.lower().endswith(aligned_extension)] | |
135 with open(directory + os.sep + results, "a") as f: | |
136 for file in result: | |
137 with open(directory + os.sep + file, "r") as r: | |
138 f.write(toData(r.read(),file)) | |
139 | |
140 if __name__ == '__main__': | |
141 main() | |
142 |