Mercurial > repos > galaxyp > regex_find_replace
view regex.py @ 1:209b7c5ee9d7 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/regex_find_replace commit 7283e44acd11cd11400a191d3b766885919956b4
author | galaxyp |
---|---|
date | Thu, 19 Jan 2017 11:26:15 -0500 |
parents | 60d04307b027 |
children | 538933d9fccc |
line wrap: on
line source
import sys import os import re import string import commands from optparse import OptionParser from tempfile import NamedTemporaryFile def main(): parser = OptionParser() parser.add_option("--input", dest="input") parser.add_option("--output", dest="output") parser.add_option("--input_display_name", dest="input_display_name") parser.add_option("--pattern", dest="patterns", action="append", help="regex pattern for replacement") parser.add_option("--replacement", dest="replacements", action="append", help="replacement for regex match") parser.add_option("--column", dest="column", default=None) (options, args) = parser.parse_args() mapped_chars = { '\'' :'__sq__', '\\' : '__backslash__' } column = None if options.column is not None: column = int(options.column) - 1 # galaxy tabular is 1-based, python array are zero-based with open(options.input, 'r') as input: with open(options.output, 'w') as output: while True: line = input.readline() if line == "": break for (pattern, replacement) in zip(options.patterns, options.replacements): for key, value in mapped_chars.items(): pattern = pattern.replace(value, key) replacement = replacement.replace(value, key) replacement = replacement.replace("#{input_name}", options.input_display_name) if column is None: line = re.sub(pattern, replacement, line) else: cells = line.split("\t") if cells and len(cells) > column: cell = cells[column] cell = re.sub(pattern, replacement, cell) cells[column] = cell line = "\t".join(cells) output.write(line) if __name__ == "__main__": main()