annotate regex.py @ 0:9ea374bb0350 draft default tip

Uploaded
author jjohnson
date Sat, 29 Mar 2014 13:41:51 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
1 import sys
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
2 import os
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
3 import re
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
4 import string
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
5 import commands
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
6 from optparse import OptionParser
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
7 from tempfile import NamedTemporaryFile
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
8
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
9 def main():
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
10 parser = OptionParser()
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
11 parser.add_option("--input", dest="input")
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
12 parser.add_option("--output", dest="output")
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
13 parser.add_option("--pattern", dest="patterns", action="append",
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
14 help="regex pattern for replacement")
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
15 parser.add_option("--replacement", dest="replacements", action="append",
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
16 help="replacement for regex match")
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
17 parser.add_option("--column", dest="column", default=None)
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
18 (options, args) = parser.parse_args()
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
19
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
20 mapped_chars = { '\'' :'__sq__', '\\' : '__backslash__' }
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
21
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
22 column = None
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
23 if options.column is not None:
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
24 column = int(options.column) - 1 # galaxy tabular is 1-based, python array are zero-based
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
25
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
26 with open(options.input, 'r') as input:
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
27 with open(options.output, 'w') as output:
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
28 while True:
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
29 line = input.readline()
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
30 if line == "":
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
31 break
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
32 for (pattern, replacement) in zip(options.patterns, options.replacements):
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
33 for key, value in mapped_chars.items():
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
34 pattern = pattern.replace(value, key)
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
35 replacement = replacement.replace(value, key)
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
36 if column is None:
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
37 line = re.sub(pattern, replacement, line)
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
38 else:
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
39 cells = line.split("\t")
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
40 if cells and len(cells) > column:
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
41 cell = cells[column]
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
42 cell = re.sub(pattern, replacement, cell)
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
43 cells[column] = cell
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
44 line = "\t".join(cells)
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
45 output.write(line)
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
46
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
47 if __name__ == "__main__":
9ea374bb0350 Uploaded
jjohnson
parents:
diff changeset
48 main()