comparison column_regex_substitution.py @ 0:12b740c4cbc1 draft default tip

planemo upload for repository https://github.com/blankenberg/tools-blankenberg/tree/master/tools/column_regex_substitution commit 78936dc6be1747303d4cbfd80d09e4cfd1cbf292
author blankenberg
date Fri, 07 Sep 2018 10:29:30 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:12b740c4cbc1
1 #!/usr/bin/env python
2 #Dan Blankenberg
3
4 """
5 A script for using regex substitutions on columns.
6 """
7
8 import optparse
9 import re
10 import sys
11 import string
12
13 VERSION = "0.0.1"
14
15 COLUMN_STRIP_VALUES = "".join( set( string.printable ) - set( string.digits ) - set(',') )
16
17 def get_provided_columns( provided_value, column_offset ):
18 try:
19 rval = sorted( map( lambda x: int( x.strip( COLUMN_STRIP_VALUES ) ) + column_offset, provided_value.split( ',' ) ) )
20 except:
21 rval = None
22 if rval:
23 return rval
24 return None
25
26
27 def __main__():
28 parser = optparse.OptionParser()
29 parser.add_option('--pattern', action='store', default=None,
30 help='pattern string')
31 parser.add_option('--replacement', action='store', default=None,
32 help='replacement string')
33 parser.add_option('--input', action='store', default=None,
34 help='Filename of input file')
35 parser.add_option('--output', action='store', default=None,
36 help='Filename of output file')
37 parser.add_option('--delimiter', action='store', default=None,
38 help='column delimiter')
39 parser.add_option('--columns', action='store', default=None,
40 help='columns to operate on')
41 parser.add_option('--column_offset', action='store', default=0,
42 help='offset to apply to columns index to force to zero-based')
43 parser.add_option('--skip', action='store', default=0,
44 help='Number of lines to skip')
45 parser.add_option('--version', action='store_true', default=False,
46 help='Show version')
47
48 (options, args) = parser.parse_args()
49
50 if options.version:
51 print "blankenberg_python_regex_substitution %s" % ( VERSION )
52 sys.exit(0)
53
54 if None in [ options.pattern, options.replacement, options.output ]:
55 parser.print_help()
56 sys.exit(1)
57
58 pattern = options.pattern
59 replacement = options.replacement
60 column_offset = int( options.column_offset )
61 print "Pattern: %s\nReplacement: %s" % ( repr( pattern ), repr( replacement ) )
62 pattern = re.compile( pattern )
63 provided_columns = get_provided_columns( options.columns, column_offset )
64 if provided_columns:
65 column_str = ", ".join( map( lambda x: str( x - column_offset ), provided_columns ) )
66 else:
67 column_str = 'all'
68 print "With delimiter %s, on columns: %s" % ( repr( options.delimiter ), column_str )
69 if options.delimiter is None:
70 split_func = lambda x: [ x.rstrip( '\n\r' ) ]
71 join_char = ""
72 else:
73 split_func = lambda x: x.rstrip( '\n\r' ).split( options.delimiter )
74 join_char = options.delimiter
75 with open( options.input, 'rb' ) as fin:
76 with open( options.output, 'w') as fout:
77 for i, line in enumerate( fin ):
78 if i < options.skip:
79 continue
80 line = split_func( line )
81 field_count = len( line )
82 if provided_columns:
83 columns = provided_columns
84 else:
85 columns = range( field_count )
86 for j in columns:
87 if j >= field_count:
88 break
89 line[ j ] = re.sub( pattern, replacement, line[ j ] )
90 fout.write( "%s\n" % ( join_char.join( line ) ) )
91
92 if __name__ == "__main__":
93 __main__()