0
|
1 import sys
|
|
2 import os
|
|
3 import re
|
|
4 import string
|
|
5 import commands
|
|
6 from optparse import OptionParser
|
|
7 from tempfile import NamedTemporaryFile
|
|
8
|
|
9 def main():
|
|
10 parser = OptionParser()
|
|
11 parser.add_option("--input", dest="input")
|
|
12 parser.add_option("--output", dest="output")
|
|
13 parser.add_option("--pattern", dest="patterns", action="append",
|
|
14 help="regex pattern for replacement")
|
|
15 parser.add_option("--replacement", dest="replacements", action="append",
|
|
16 help="replacement for regex match")
|
|
17 parser.add_option("--column", dest="column", default=None)
|
|
18 (options, args) = parser.parse_args()
|
|
19
|
|
20 mapped_chars = { '\'' :'__sq__', '\\' : '__backslash__' }
|
|
21
|
|
22 column = None
|
|
23 if options.column is not None:
|
|
24 column = int(options.column) - 1 # galaxy tabular is 1-based, python array are zero-based
|
|
25
|
|
26 with open(options.input, 'r') as input:
|
|
27 with open(options.output, 'w') as output:
|
|
28 while True:
|
|
29 line = input.readline()
|
|
30 if line == "":
|
|
31 break
|
|
32 for (pattern, replacement) in zip(options.patterns, options.replacements):
|
|
33 for key, value in mapped_chars.items():
|
|
34 pattern = pattern.replace(value, key)
|
|
35 replacement = replacement.replace(value, key)
|
|
36 if column is None:
|
|
37 line = re.sub(pattern, replacement, line)
|
|
38 else:
|
|
39 cells = line.split("\t")
|
|
40 if cells and len(cells) > column:
|
|
41 cell = cells[column]
|
|
42 cell = re.sub(pattern, replacement, cell)
|
|
43 cells[column] = cell
|
|
44 line = "\t".join(cells)
|
|
45 output.write(line)
|
|
46
|
|
47 if __name__ == "__main__":
|
|
48 main()
|