annotate tools/filters/grep.py @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 # Filename: grep.py
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 # Author: Ian N. Schenck
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 # Version: 8/23/2005
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 #
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 # This script accepts regular expressions, as well as an "invert"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 # option, and applies the regular expression using grep. This wrapper
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 # provides security and pipeline.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 #
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 # Grep is launched based on these inputs:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 # -i Input file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 # -o Output file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 # -pattern RegEx pattern
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 # -v true or false (output NON-matching lines)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 import sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 import os
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 import re
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 import string
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 import commands
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 from tempfile import NamedTemporaryFile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 # This function is exceedingly useful, perhaps package for reuse?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 def getopts(argv):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 opts = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 while argv:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 if argv[0][0] == '-':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 opts[argv[0]] = argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 argv = argv[2:]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 argv = argv[1:]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 return opts
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 args = sys.argv[1:]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 opts = getopts(args)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 except IndexError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 print "Usage:"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 print " -i Input file"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 print " -o Output file"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 print " -pattern RegEx pattern"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 print " -v true or false (Invert match)"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 return 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 outputfile = opts.get("-o")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 if outputfile == None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 print "No output file specified."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 return -1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 inputfile = opts.get("-i")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 if inputfile == None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 print "No input file specified."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 return -2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 invert = opts.get("-v")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 if invert == None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 print "Match style (Invert or normal) not specified."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 return -3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 pattern = opts.get("-pattern")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 if pattern == None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 print "RegEx pattern not specified."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 return -4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 # All inputs have been specified at this point, now validate.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 # replace if input has been escaped, remove sq
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 # characters that are allowed but need to be escaped
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 mapped_chars = { '>' :'__gt__',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 '<' :'__lt__',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 '\'' :'__sq__',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 '"' :'__dq__',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 '[' :'__ob__',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 ']' :'__cb__',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 '{' :'__oc__',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 '}' :'__cc__'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 #with new sanitizing we only need to replace for single quote, but this needs to remain for backwards compatibility
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 for key, value in mapped_chars.items():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 pattern = pattern.replace(value, key)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") #why?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 invertRegEx = re.compile("(true)|(false)") #why?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 if not fileRegEx.match(outputfile):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 print "Illegal output filename."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 return -5
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 if not fileRegEx.match(inputfile):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 print "Illegal input filename."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 return -6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 if not invertRegEx.match(invert):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 print "Illegal invert option."
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 return -7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 # invert grep search?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 if invert == "true":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 invertflag = " -v"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 print "Not matching pattern: %s" % pattern
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 invertflag = ""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 print "Matching pattern: %s" % pattern
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 #Create temp file holding pattern
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 #By using a file to hold the pattern, we don't have worry about sanitizing grep commandline and can include single quotes in pattern
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 pattern_file_name = NamedTemporaryFile().name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 open( pattern_file_name, 'w' ).write( pattern )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 #generate grep command
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 commandline = "grep -E %s -f %s %s > %s" % ( invertflag, pattern_file_name, inputfile, outputfile )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 #run grep
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 errorcode, stdout = commands.getstatusoutput(commandline)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 #remove temp pattern file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 os.unlink( pattern_file_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 #return error code
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 return errorcode
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 main()