comparison tools/filters/grep.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 # Filename: grep.py
2 # Author: Ian N. Schenck
3 # Version: 8/23/2005
4 #
5 # This script accepts regular expressions, as well as an "invert"
6 # option, and applies the regular expression using grep. This wrapper
7 # provides security and pipeline.
8 #
9 # Grep is launched based on these inputs:
10 # -i Input file
11 # -o Output file
12 # -pattern RegEx pattern
13 # -v true or false (output NON-matching lines)
14
15 import sys
16 import os
17 import re
18 import string
19 import commands
20 from tempfile import NamedTemporaryFile
21
22 # This function is exceedingly useful, perhaps package for reuse?
23 def getopts(argv):
24 opts = {}
25 while argv:
26 if argv[0][0] == '-':
27 opts[argv[0]] = argv[1]
28 argv = argv[2:]
29 else:
30 argv = argv[1:]
31 return opts
32
33 def main():
34 args = sys.argv[1:]
35
36 try:
37 opts = getopts(args)
38 except IndexError:
39 print "Usage:"
40 print " -i Input file"
41 print " -o Output file"
42 print " -pattern RegEx pattern"
43 print " -v true or false (Invert match)"
44 return 0
45
46 outputfile = opts.get("-o")
47 if outputfile == None:
48 print "No output file specified."
49 return -1
50
51 inputfile = opts.get("-i")
52 if inputfile == None:
53 print "No input file specified."
54 return -2
55
56 invert = opts.get("-v")
57 if invert == None:
58 print "Match style (Invert or normal) not specified."
59 return -3
60
61 pattern = opts.get("-pattern")
62 if pattern == None:
63 print "RegEx pattern not specified."
64 return -4
65
66 # All inputs have been specified at this point, now validate.
67
68 # replace if input has been escaped, remove sq
69 # characters that are allowed but need to be escaped
70 mapped_chars = { '>' :'__gt__',
71 '<' :'__lt__',
72 '\'' :'__sq__',
73 '"' :'__dq__',
74 '[' :'__ob__',
75 ']' :'__cb__',
76 '{' :'__oc__',
77 '}' :'__cc__'
78 }
79
80 #with new sanitizing we only need to replace for single quote, but this needs to remain for backwards compatibility
81 for key, value in mapped_chars.items():
82 pattern = pattern.replace(value, key)
83
84 fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") #why?
85 invertRegEx = re.compile("(true)|(false)") #why?
86
87 if not fileRegEx.match(outputfile):
88 print "Illegal output filename."
89 return -5
90 if not fileRegEx.match(inputfile):
91 print "Illegal input filename."
92 return -6
93 if not invertRegEx.match(invert):
94 print "Illegal invert option."
95 return -7
96
97 # invert grep search?
98 if invert == "true":
99 invertflag = " -v"
100 print "Not matching pattern: %s" % pattern
101 else:
102 invertflag = ""
103 print "Matching pattern: %s" % pattern
104
105 #Create temp file holding pattern
106 #By using a file to hold the pattern, we don't have worry about sanitizing grep commandline and can include single quotes in pattern
107 pattern_file_name = NamedTemporaryFile().name
108 open( pattern_file_name, 'w' ).write( pattern )
109
110 #generate grep command
111 commandline = "grep -E %s -f %s %s > %s" % ( invertflag, pattern_file_name, inputfile, outputfile )
112
113 #run grep
114 errorcode, stdout = commands.getstatusoutput(commandline)
115
116 #remove temp pattern file
117 os.unlink( pattern_file_name )
118
119 #return error code
120 return errorcode
121
122 if __name__ == "__main__":
123 main()