# HG changeset patch # User iuc # Date 1599873725 0 # Node ID a3aab6045663ec1ed658961fdaddb1610b98f774 # Parent 0c95a3f1654fd99b9e75eecfb1581c25dab4f12a "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit daa9af57fe07ee83a45ddc9f855716f9d14a8e12" diff -r 0c95a3f1654f -r a3aab6045663 filters.py --- a/filters.py Thu Jan 23 07:36:39 2020 -0500 +++ b/filters.py Sat Sep 12 01:22:05 2020 +0000 @@ -32,9 +32,20 @@ p = filter_dict['pattern'] r = filter_dict['replace'] c = int(filter_dict['column']) - 1 - self.func = lambda i, l: '\t'.join( - [x if j != c else re.sub(p, r, x) - for j, x in enumerate(l.split('\t'))]) + if 'add' not in filter_dict\ + or filter_dict['add'] not in ['prepend', + 'append', + 'before', + 'after']: + self.func = lambda i, l: '\t'.join( + [x if j != c else re.sub(p, r, x) + for j, x in enumerate(l.split('\t'))]) + else: + a = 0 if filter_dict['add'] == 'prepend'\ + else min(0, c - 1) if filter_dict['add'] == 'before'\ + else c + 1 if filter_dict['add'] == 'after'\ + else None + self.func = lambda i, l: self.replace_add(l, p, r, c, a) elif filter_dict['filter'] == 'prepend_line_num': self.func = lambda i, l: '%d\t%s' % (i, l) elif filter_dict['filter'] == 'append_line_num': @@ -69,6 +80,14 @@ fields = line.split('\t') return '\t'.join([fields[x] for x in cols]) + def replace_add(self, line, pat, rep, col, pos): + fields = line.rstrip('\r\n').split('\t') + i = pos if pos else len(fields) + val = '' + if col < len(fields) and re.search(pat, fields[col]): + val = re.sub(pat, rep, fields[col]).replace('\t', ' ') + return '\t'.join(fields[:i] + [val] + fields[i:]) + def normalize(self, line, split_cols, sep): lines = [] fields = line.rstrip('\r\n').split('\t') diff -r 0c95a3f1654f -r a3aab6045663 load_db.py --- a/load_db.py Thu Jan 23 07:36:39 2020 -0500 +++ b/load_db.py Sat Sep 12 01:22:05 2020 +0000 @@ -176,7 +176,7 @@ def get_column_def(file_path, table_name, skip=0, comment_char='#', - column_names=None, max_lines=100, load_named_columns=False, + column_names=None, max_lines=1000, load_named_columns=False, firstlinenames=False, filters=None): col_pref = ['TEXT', 'REAL', 'INTEGER', None] col_types = [] @@ -272,8 +272,26 @@ if x else None for i, x in enumerate(fields)] c.execute(insert_stmt, vals) except Exception as e: - print('Failed at line: %d err: %s' % (linenum, e), + print('Load %s Failed line: %d err: %s' % (file_path, linenum, e), file=sys.stderr) + for i, val in enumerate(fields): + try: + col_func[i](val) + except Exception: + colType = getValueType(val) + col_func[i] = float if colType == 'REAL' else int if colType == 'INTEGER' else str + print('Changing %s from %s to %s' % (col_names[i], col_types[i], colType), + file=sys.stderr) + col_types[i] = colType + vals = [col_func[i](x) + if x else None for i, x in enumerate(fields)] + print('%s %s' % (insert_stmt, vals), + file=sys.stderr) + try: + c.execute(insert_stmt, vals) + except Exception as e: + print('Insert %s line: %d Failed err: %s' % (file_path, linenum, e), + file=sys.stderr) conn.commit() c.close() for i, index in enumerate(unique_indexes): diff -r 0c95a3f1654f -r a3aab6045663 macros.xml --- a/macros.xml Thu Jan 23 07:36:39 2020 -0500 +++ b/macros.xml Sat Sep 12 01:22:05 2020 +0000 @@ -40,6 +40,9 @@ #set $filter_dict['column'] = int(str($fi.filter.column).replace('c','')) #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) #set $filter_dict['replace'] = str($fi.filter.regex_replace) + #if $fi.filter.add: + #set $filter_dict['add'] = str($fi.filter.add) + #end if #silent $input_filters.append($filter_dict) #elif str($fi.filter.filter_type).endswith('pend_line_num'): #set $filter_dict = dict() @@ -172,7 +175,7 @@ - ^(c?[1-9]\d*)$ + ^(c?[1-9]\d*)$ @@ -180,6 +183,12 @@ + + + + + + @@ -211,6 +220,7 @@ - by regex expression matching *include/exclude* lines the match the regex expression - select columns choose to include only selected columns in the order specified - regex replace value in column replace a field in a column using a regex substitution (good for date reformatting) + - regex replace value in column add a new column using a regex substitution of a column value - prepend a line number column each line has the ordinal value of the line read by this filter as the first column - append a line number column each line has the ordinal value of the line read by this filter as the last column - prepend a text column each line has the text string as the first column diff -r 0c95a3f1654f -r a3aab6045663 query_db.py --- a/query_db.py Thu Jan 23 07:36:39 2020 -0500 +++ b/query_db.py Sat Sep 12 01:22:05 2020 +0000 @@ -52,7 +52,7 @@ except Exception as exc: print("Warning: %s" % exc, file=sys.stderr) except Exception as e: - exit('Error: %s' % (e)) + exit('describe_tables Error: %s' % (e)) exit(0) diff -r 0c95a3f1654f -r a3aab6045663 query_tabular.xml --- a/query_tabular.xml Thu Jan 23 07:36:39 2020 -0500 +++ b/query_tabular.xml Sat Sep 12 01:22:05 2020 +0000 @@ -1,4 +1,4 @@ - + using sqlite sql @@ -166,7 +166,7 @@
- + diff -r 0c95a3f1654f -r a3aab6045663 test-data/filtered_customers_results.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtered_customers_results.tsv Sat Sep 12 01:22:05 2020 +0000 @@ -0,0 +1,4 @@ +1 John Smith John.Smith@yahoo.com yahoo.com 1968-02-04 626 222-2222 com +2 Steven Goldfish goldfish@fishhere.net fishhere.net 1974-04-04 323 455-4545 net +3 Paula Brown pb@herowndomain.org herowndomain.org 1978-05-24 416 323-3232 org +4 James Smith jim@supergig.co.uk supergig.co.uk 1980-10-20 416 323-8888