# HG changeset patch # User jjohnson # Date 1498137296 14400 # Node ID 5b4f6cf857cf78872551d9983ae028cff581f460 # Parent 67082f49f047d2376a15d6b7d45966cfbf755081 Uploaded diff -r 67082f49f047 -r 5b4f6cf857cf ._query_tabular.py Binary file ._query_tabular.py has changed diff -r 67082f49f047 -r 5b4f6cf857cf query_tabular.py --- a/query_tabular.py Fri Jun 09 11:25:46 2017 -0400 +++ b/query_tabular.py Thu Jun 22 09:14:56 2017 -0400 @@ -79,6 +79,9 @@ self.func = lambda i,l: l if not rgx.search(l) else None elif filter_dict['action'] == 'include_find': self.func = lambda i,l: l if rgx.search(l) else None + elif filter_dict['filter'] == 'select_columns': + cols = [int(c) - 1 for c in filter_dict['columns']] + self.func = lambda i,l: self.select_columns(l,cols) elif filter_dict['filter'] == 'replace': p = filter_dict['pattern'] r = filter_dict['replace'] @@ -97,6 +100,9 @@ self.func = lambda i,l: self.normalize(l,cols,sep) def __iter__(self): return self + def select_columns(self,line,cols): + fields = line.split('\t') + return '\t'.join([fields[x] for x in cols]) def normalize(self,line,split_cols,sep): lines = [] fields = line.rstrip('\r\n').split('\t') @@ -309,6 +315,17 @@ parser.add_option('-o', '--output', dest='output', default=None, help='Output file for query results') (options, args) = parser.parse_args() + def run_query(query,outputFile): + conn = get_connection(options.sqlitedb, addfunctions=True) + cur = conn.cursor() + results = cur.execute(query) + if not options.no_header: + outputFile.write("#%s\n" % '\t'.join([str(col[0]) for col in cur.description])) + # yield [col[0] for col in cur.description] + for i, row in enumerate(results): + # yield [val for val in row] + outputFile.write("%s\n" % '\t'.join([str(val) if val is not None else '' for val in row])) + # open sqlite connection conn = get_connection(options.sqlitedb) # determine output destination @@ -394,15 +411,7 @@ # print >> sys.stderr, "Error: Must be a read only query" # exit(2) try: - conn = get_connection(options.sqlitedb, addfunctions=True) - cur = conn.cursor() - results = cur.execute(query) - if not options.no_header: - outputFile.write("#%s\n" % '\t'.join([str(col[0]) for col in cur.description])) - # yield [col[0] for col in cur.description] - for i, row in enumerate(results): - # yield [val for val in row] - outputFile.write("%s\n" % '\t'.join([str(val) if val is not None else '' for val in row])) + run_query(query,outputFile) except Exception, exc: print >> sys.stderr, "Error: %s" % exc exit(1) diff -r 67082f49f047 -r 5b4f6cf857cf query_tabular.xml --- a/query_tabular.xml Fri Jun 09 11:25:46 2017 -0400 +++ b/query_tabular.xml Thu Jun 22 09:14:56 2017 -0400 @@ -1,4 +1,4 @@ - + using sqlite sql @@ -103,6 +103,11 @@ #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) #set $filter_dict['action'] = str($fi.filter.regex_action) #silent $input_filters.append($filter_dict) + #elif $fi.filter.filter_type == 'select_columns': + #set $filter_dict = dict() + #set $filter_dict['filter'] = str($fi.filter.filter_type) + #set $filter_dict['columns'] = [int(str($ci)) for $ci in str($fi.filter.columns).split(',')] + #silent $input_filters.append($filter_dict) #elif $fi.filter.filter_type == 'replace': #set $filter_dict = dict() #set $filter_dict['filter'] = str($fi.filter.filter_type) @@ -145,6 +150,7 @@ + @@ -172,6 +178,12 @@ + + + ^([1-9]\d*)(,[1-9]\d*)*$ + + @@ -310,6 +322,7 @@ - skip leading lines skip the first *number* of lines - comment char omit any lines that start with the specified comment character - by regex expression matching *include/exclude* lines the match the regex expression + - select columns choose to include only selected columns in the order specified - regex replace value in column replace a field in a column using a regex substitution (good for date reformatting) - prepend a line number column each line has the ordinal value of the line read by this filter as the first column - append a line number column each line has the ordinal value of the line read by this filter as the last column @@ -541,18 +554,19 @@ Filter 1 - by regex expression matching [include]: '^\d+' (include lines that start with a number) Filter 2 - append a line number column: Filter 3 - regex replace value in column[4]: '(\d+)/(\d+)/(\d+)' '19\3-\2-\1' (convert dates to sqlite format) + Filter 4 - select columns 7,2,3,4,1 Table: People - Columns: Pets,FirstName,LastName,DOB,,,id + Columns: id,FirstName,LastName,DOB,Pets - ==== ========= ======== ========== == - Pets FirstName LastName DOB id - ==== ========= ======== ========== == - 2 Paula Brown 1978-05-24 1 - 1 Steven Jones 1974-04-04 2 - 0 Jane Doe 1978-05-24 3 - 1 James Smith 1980-10-20 4 - ==== ========= ======== ========== == + == ========= ======== ========== ==== + id FirstName LastName DOB Pets + == ========= ======== ========== ==== + 1 Paula Brown 1978-05-24 2 + 2 Steven Jones 1974-04-04 1 + 3 Jane Doe 1978-05-24 0 + 4 James Smith 1980-10-20 1 + == ========= ======== ========== ==== *Pet Table* @@ -563,18 +577,19 @@ Filter 2 - append a line number column: Filter 3 - by regex expression matching [exclude]: '^0\t' (exclude lines with no pets) Filter 4 - normalize list columns[5,6]: + Filter 5 - select columns 7,5,6 Table: Pet - Columns: ,,,,PetName,PetType,id + Columns: id,PetName,PetType - ======== ======== == - PetName PetType id - ======== ======== == - Rex dog 1 - Fluff cat 1 - Allie cat 2 - Spot 4 - ======== ======== == + == ======== ======== + id PetName PetType + == ======== ======== + 1 Rex dog + 1 Fluff cat + 2 Allie cat + 4 Spot + == ======== ======== Query: SELECT FirstName,LastName,PetName FROM People join Pet on People.id = Pet.id WHERE PetType = 'cat';