diff tools/filters/grep.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/filters/grep.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,123 @@
+# Filename: grep.py
+# Author: Ian N. Schenck
+# Version: 8/23/2005
+#
+# This script accepts regular expressions, as well as an "invert"
+# option, and applies the regular expression using grep.  This wrapper
+# provides security and pipeline.
+#
+# Grep is launched based on these inputs:
+# -i		Input file
+# -o		Output file
+# -pattern	RegEx pattern
+# -v	        true or false (output NON-matching lines)
+
+import sys
+import os
+import re
+import string
+import commands
+from tempfile import NamedTemporaryFile
+
+# This function is exceedingly useful, perhaps package for reuse?
+def getopts(argv):
+    opts = {}
+    while argv:
+	if argv[0][0] == '-':
+	    opts[argv[0]] = argv[1]
+	    argv = argv[2:]
+	else:
+	    argv = argv[1:]
+    return opts
+
+def main():
+    args = sys.argv[1:]
+
+    try:
+	opts = getopts(args)
+    except IndexError:
+	print "Usage:"
+	print " -i		Input file"
+	print " -o		Output file"
+	print " -pattern	RegEx pattern"
+	print " -v		true or false (Invert match)"
+	return 0
+
+    outputfile = opts.get("-o")
+    if outputfile == None:
+	print "No output file specified."
+	return -1
+    
+    inputfile = opts.get("-i")
+    if inputfile == None:
+	print "No input file specified."
+	return -2
+
+    invert = opts.get("-v")
+    if invert == None:
+	print "Match style (Invert or normal) not specified."
+	return -3
+
+    pattern = opts.get("-pattern")
+    if pattern == None:
+	print "RegEx pattern not specified."
+	return -4
+
+    # All inputs have been specified at this point, now validate.
+
+    # replace if input has been escaped, remove sq
+    # characters that are allowed but need to be escaped
+    mapped_chars = { '>' :'__gt__', 
+                 '<' :'__lt__', 
+                 '\'' :'__sq__',
+                 '"' :'__dq__',
+                 '[' :'__ob__',
+                 ']' :'__cb__',
+		 '{' :'__oc__',
+                 '}' :'__cc__'
+                 }
+    
+    #with new sanitizing we only need to replace for single quote, but this needs to remain for backwards compatibility
+    for key, value in mapped_chars.items():
+        pattern = pattern.replace(value, key)
+    
+    fileRegEx = re.compile("^[A-Za-z0-9./\-_]+$") #why?
+    invertRegEx = re.compile("(true)|(false)") #why?
+
+    if not fileRegEx.match(outputfile):
+	print "Illegal output filename."
+	return -5
+    if not fileRegEx.match(inputfile):
+	print "Illegal input filename."
+	return -6
+    if not invertRegEx.match(invert):
+	print "Illegal invert option."
+	return -7
+
+    # invert grep search?
+    if invert == "true":
+        invertflag = " -v"
+        print "Not matching pattern: %s" % pattern
+    else:
+        invertflag = ""
+        print "Matching pattern: %s" % pattern
+    
+    #Create temp file holding pattern
+    #By using a file to hold the pattern, we don't have worry about sanitizing grep commandline and can include single quotes in pattern
+    pattern_file_name = NamedTemporaryFile().name
+    open( pattern_file_name, 'w' ).write( pattern )
+    
+    #generate grep command
+    commandline = "grep -E %s -f %s %s > %s" % ( invertflag, pattern_file_name, inputfile, outputfile )
+    
+    #run grep
+    errorcode, stdout = commands.getstatusoutput(commandline)
+    
+    #remove temp pattern file
+    os.unlink( pattern_file_name )
+    
+    #return error code
+    return errorcode
+
+if __name__ == "__main__":
+    main()