Mercurial > repos > xuebing > sharplabtool
comparison tools/filters/grep.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 # Filename: grep.py | |
2 # Author: Ian N. Schenck | |
3 # Version: 8/23/2005 | |
4 # | |
5 # This script accepts regular expressions, as well as an "invert" | |
6 # option, and applies the regular expression using grep. This wrapper | |
7 # provides security and pipeline. | |
8 # | |
9 # Grep is launched based on these inputs: | |
10 # -i Input file | |
11 # -o Output file | |
12 # -pattern RegEx pattern | |
13 # -v true or false (output NON-matching lines) | |
14 | |
15 import sys | |
16 import os | |
17 import re | |
18 import string | |
19 import commands | |
20 from tempfile import NamedTemporaryFile | |
21 | |
22 # This function is exceedingly useful, perhaps package for reuse? | |
23 def getopts(argv): | |
24 opts = {} | |
25 while argv: | |
26 if argv[0][0] == '-': | |
27 opts[argv[0]] = argv[1] | |
28 argv = argv[2:] | |
29 else: | |
30 argv = argv[1:] | |
31 return opts | |
32 | |
33 def main(): | |
34 args = sys.argv[1:] | |
35 | |
36 try: | |
37 opts = getopts(args) | |
38 except IndexError: | |
39 print "Usage:" | |
40 print " -i Input file" | |
41 print " -o Output file" | |
42 print " -pattern RegEx pattern" | |
43 print " -v true or false (Invert match)" | |
44 return 0 | |
45 | |
46 outputfile = opts.get("-o") | |
47 if outputfile == None: | |
48 print "No output file specified." | |
49 return -1 | |
50 | |
51 inputfile = opts.get("-i") | |
52 if inputfile == None: | |
53 print "No input file specified." | |
54 return -2 | |
55 | |
56 invert = opts.get("-v") | |
57 if invert == None: | |
58 print "Match style (Invert or normal) not specified." | |
59 return -3 | |
60 | |
61 pattern = opts.get("-pattern") | |
62 if pattern == None: | |
63 print "RegEx pattern not specified." | |
64 return -4 | |
65 | |
66 # All inputs have been specified at this point, now validate. | |
67 | |
68 # replace if input has been escaped, remove sq | |
69 # characters that are allowed but need to be escaped | |
70 mapped_chars = { '>' :'__gt__', | |
71 '<' :'__lt__', | |
72 '\'' :'__sq__', | |
73 '"' :'__dq__', | |
74 '[' :'__ob__', | |
75 ']' :'__cb__', | |
76 '{' :'__oc__', | |
77 '}' :'__cc__' | |
78 } | |
79 | |
80 #with new sanitizing we only need to replace for single quote, but this needs to remain for backwards compatibility | |
81 for key, value in mapped_chars.items(): | |
82 pattern = pattern.replace(value, key) | |
83 | |
84 fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") #why? | |
85 invertRegEx = re.compile("(true)|(false)") #why? | |
86 | |
87 if not fileRegEx.match(outputfile): | |
88 print "Illegal output filename." | |
89 return -5 | |
90 if not fileRegEx.match(inputfile): | |
91 print "Illegal input filename." | |
92 return -6 | |
93 if not invertRegEx.match(invert): | |
94 print "Illegal invert option." | |
95 return -7 | |
96 | |
97 # invert grep search? | |
98 if invert == "true": | |
99 invertflag = " -v" | |
100 print "Not matching pattern: %s" % pattern | |
101 else: | |
102 invertflag = "" | |
103 print "Matching pattern: %s" % pattern | |
104 | |
105 #Create temp file holding pattern | |
106 #By using a file to hold the pattern, we don't have worry about sanitizing grep commandline and can include single quotes in pattern | |
107 pattern_file_name = NamedTemporaryFile().name | |
108 open( pattern_file_name, 'w' ).write( pattern ) | |
109 | |
110 #generate grep command | |
111 commandline = "grep -E %s -f %s %s > %s" % ( invertflag, pattern_file_name, inputfile, outputfile ) | |
112 | |
113 #run grep | |
114 errorcode, stdout = commands.getstatusoutput(commandline) | |
115 | |
116 #remove temp pattern file | |
117 os.unlink( pattern_file_name ) | |
118 | |
119 #return error code | |
120 return errorcode | |
121 | |
122 if __name__ == "__main__": | |
123 main() |