comparison regex.py @ 0:9ea374bb0350 draft default tip

Uploaded
author jjohnson
date Sat, 29 Mar 2014 13:41:51 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9ea374bb0350
1 import sys
2 import os
3 import re
4 import string
5 import commands
6 from optparse import OptionParser
7 from tempfile import NamedTemporaryFile
8
9 def main():
10 parser = OptionParser()
11 parser.add_option("--input", dest="input")
12 parser.add_option("--output", dest="output")
13 parser.add_option("--pattern", dest="patterns", action="append",
14 help="regex pattern for replacement")
15 parser.add_option("--replacement", dest="replacements", action="append",
16 help="replacement for regex match")
17 parser.add_option("--column", dest="column", default=None)
18 (options, args) = parser.parse_args()
19
20 mapped_chars = { '\'' :'__sq__', '\\' : '__backslash__' }
21
22 column = None
23 if options.column is not None:
24 column = int(options.column) - 1 # galaxy tabular is 1-based, python array are zero-based
25
26 with open(options.input, 'r') as input:
27 with open(options.output, 'w') as output:
28 while True:
29 line = input.readline()
30 if line == "":
31 break
32 for (pattern, replacement) in zip(options.patterns, options.replacements):
33 for key, value in mapped_chars.items():
34 pattern = pattern.replace(value, key)
35 replacement = replacement.replace(value, key)
36 if column is None:
37 line = re.sub(pattern, replacement, line)
38 else:
39 cells = line.split("\t")
40 if cells and len(cells) > column:
41 cell = cells[column]
42 cell = re.sub(pattern, replacement, cell)
43 cells[column] = cell
44 line = "\t".join(cells)
45 output.write(line)
46
47 if __name__ == "__main__":
48 main()