# HG changeset patch # User iuc # Date 1500383227 14400 # Node ID 3708ff0198b7869a9469840778799b4b9d7892a3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 74915fc9cee746bbce1c4b507e13231259de177d diff -r 000000000000 -r 3708ff0198b7 filter_tabular.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filter_tabular.py Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import json +import optparse +import os.path +import sys + +from filters import filter_file + + +def __main__(): + # Parse Command Line + parser = optparse.OptionParser() + parser.add_option('-i', '--input', dest='input', default=None, + help='Input file for filtering') + parser.add_option('-j', '--jsonfile', dest='jsonfile', default=None, + help='JSON array of filter specifications') + parser.add_option('-o', '--output', dest='output', default=None, + help='Output file for query results') + parser.add_option('-v', '--verbose', dest='verbose', default=False, + action='store_true', + help='verbose') + (options, args) = parser.parse_args() + + if options.input is not None: + try: + inputPath = os.path.abspath(options.input) + inputFile = open(inputPath, 'r') + except Exception as e: + exit('Error: %s' % (e)) + else: + inputFile = sys.stdin + + if options.output is not None: + try: + outputPath = os.path.abspath(options.output) + outputFile = open(outputPath, 'w') + except Exception as e: + exit('Error: %s' % (e)) + else: + outputFile = sys.stdout + + filters = None + if options.jsonfile: + try: + with open(options.jsonfile) as fh: + filters = json.load(fh) + except Exception as e: + exit('Error: %s' % (e)) + + if options.verbose and filters: + for f in filters: + print('%s %s' % (f['filter'], + ', '.join( + ['%s: %s' % (k, f[k]) + for k in set(f.keys()) - set(['filter'])])), + file=sys.stdout) + + try: + filter_file(inputFile, outputFile, filters=filters) + except Exception as e: + exit('Error: %s' % (e)) + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 3708ff0198b7 filters.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/filters.py Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,156 @@ +#!/usr/binsenv python + +from __future__ import print_function + +import re +import sys + + +class LineFilter(object): + def __init__(self, source, filter_dict): + self.source = source + self.filter_dict = filter_dict + self.func = lambda i, l: l.rstrip('\r\n') if l else None + self.src_lines = [] + self.src_line_cnt = 0 + if not filter_dict: + return + if filter_dict['filter'] == 'regex': + rgx = re.compile(filter_dict['pattern']) + if filter_dict['action'] == 'exclude_match': + self.func = lambda i, l: l if not rgx.match(l) else None + elif filter_dict['action'] == 'include_match': + self.func = lambda i, l: l if rgx.match(l) else None + elif filter_dict['action'] == 'exclude_find': + self.func = lambda i, l: l if not rgx.search(l) else None + elif filter_dict['action'] == 'include_find': + self.func = lambda i, l: l if rgx.search(l) else None + elif filter_dict['filter'] == 'select_columns': + cols = [int(c) - 1 for c in filter_dict['columns']] + self.func = lambda i, l: self.select_columns(l, cols) + elif filter_dict['filter'] == 'replace': + p = filter_dict['pattern'] + r = filter_dict['replace'] + c = int(filter_dict['column']) - 1 + self.func = lambda i, l: '\t'.join( + [x if j != c else re.sub(p, r, x) for j, x in enumerate(l.split('\t'))]) + elif filter_dict['filter'] == 'prepend_line_num': + self.func = lambda i, l: '%d\t%s' % (i, l) + elif filter_dict['filter'] == 'append_line_num': + self.func = lambda i, l: '%s\t%d' % (l.rstrip('\r\n'), i) + elif filter_dict['filter'] == 'prepend_text': + s = filter_dict['column_text'] + self.func = lambda i, l: '%s\t%s' % (s, l) + elif filter_dict['filter'] == 'append_text': + s = filter_dict['column_text'] + self.func = lambda i, l: '%s\t%s' % (l.rstrip('\r\n'), s) + elif filter_dict['filter'] == 'skip': + cnt = filter_dict['count'] + self.func = lambda i, l: l if i > cnt else None + elif filter_dict['filter'] == 'normalize': + cols = [int(c) - 1 for c in filter_dict['columns']] + sep = filter_dict['separator'] + self.func = lambda i, l: self.normalize(l, cols, sep) + + def __iter__(self): + return self + + def __next__(self): + if not self.src_lines: + self.get_lines() + if self.src_lines: + return self.src_lines.pop(0) + raise StopIteration + + next = __next__ + + def select_columns(self, line, cols): + fields = line.split('\t') + return '\t'.join([fields[x] for x in cols]) + + def normalize(self, line, split_cols, sep): + lines = [] + fields = line.rstrip('\r\n').split('\t') + split_fields = dict() + cnt = 0 + for c in split_cols: + if c < len(fields): + split_fields[c] = fields[c].split(sep) + cnt = max(cnt, len(split_fields[c])) + if cnt == 0: + lines.append('\t'.join(fields)) + else: + for n in range(0, cnt): + flds = [x if c not in split_cols else split_fields[c][n] + if n < len(split_fields[c]) + else '' for (c, x) in enumerate(fields)] + lines.append('\t'.join(flds)) + return lines + + def get_lines(self): + for i, next_line in enumerate(self.source): + self.src_line_cnt += 1 + line = self.func(self.src_line_cnt, next_line) + if line: + if isinstance(line, list): + self.src_lines.extend(line) + else: + self.src_lines.append(line) + return + + +class TabularReader: + """ + Tabular file iterator. Returns a list + """ + def __init__(self, input_file, skip=0, comment_char=None, col_idx=None, + filters=None): + self.skip = skip + self.comment_char = comment_char + self.col_idx = col_idx + self.filters = filters + self.tsv_file = \ + input_file if hasattr(input_file, 'readline') else open(input_file) + if skip and skip > 0: + for i in range(skip): + if not self.tsv_file.readline(): + break + source = LineFilter(self.tsv_file, None) + if comment_char: + source = LineFilter(source, + {"filter": "regex", "pattern": comment_char, + "action": "exclude_match"}) + if filters: + for f in filters: + source = LineFilter(source, f) + self.source = source + + def __iter__(self): + return self + + def __next__(self): + ''' Iteration ''' + for i, line in enumerate(self.source): + fields = line.rstrip('\r\n').split('\t') + if self.col_idx: + fields = [fields[i] for i in self.col_idx] + return fields + raise StopIteration + + next = __next__ + + +def filter_file(input_file, output, skip=0, comment_char='#', filters=None): + data_lines = 0 + try: + tr = TabularReader(input_file, skip=skip, comment_char=comment_char, + filters=filters) + for linenum, fields in enumerate(tr): + data_lines += 1 + try: + output.write('%s\n' % '\t'.join(fields)) + except Exception as e: + print('Failed at line: %d err: %s' % (linenum, e), + file=sys.stderr) + except Exception as e: + exit('Error: %s' % (e)) diff -r 000000000000 -r 3708ff0198b7 load_db.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/load_db.py Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,135 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import sys + +from filters import TabularReader + + +def getValueType(val): + if val or 0. == val: + try: + int(val) + return 'INTEGER' + except: + try: + float(val) + return 'REAL' + except: + return 'TEXT' + return None + + +def get_column_def(file_path, table_name, skip=0, comment_char='#', + column_names=None, max_lines=100, load_named_columns=False, + filters=None): + col_pref = ['TEXT', 'REAL', 'INTEGER', None] + col_types = [] + col_idx = None + try: + tr = TabularReader(file_path, skip=skip, comment_char=comment_char, + col_idx=None, filters=filters) + for linenum, fields in enumerate(tr): + if linenum > max_lines: + break + try: + while len(col_types) < len(fields): + col_types.append(None) + for i, val in enumerate(fields): + colType = getValueType(val) + if col_pref.index(colType) < col_pref.index(col_types[i]): + col_types[i] = colType + except Exception as e: + print('Failed at line: %d err: %s' % (linenum, e), + file=sys.stderr) + except Exception as e: + print('Failed: %s' % (e), file=sys.stderr) + for i, col_type in enumerate(col_types): + if not col_type: + col_types[i] = 'TEXT' + if column_names: + col_names = [] + if load_named_columns: + col_idx = [] + for i, cname in enumerate( + [cn.strip() for cn in column_names.split(',')]): + if cname != '': + col_idx.append(i) + col_names.append(cname) + col_types = [col_types[i] for i in col_idx] + else: + col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] + for i, cname in enumerate( + [cn.strip() for cn in column_names.split(',')]): + if cname and i < len(col_names): + col_names[i] = cname + else: + col_names = ['c%d' % i for i in range(1, len(col_types) + 1)] + col_def = [] + for i, col_name in enumerate(col_names): + col_def.append('%s %s' % (col_names[i], col_types[i])) + return col_names, col_types, col_def, col_idx + + +def create_table(conn, file_path, table_name, skip=0, comment_char='#', + pkey_autoincr=None, column_names=None, + load_named_columns=False, filters=None, + unique_indexes=[], indexes=[]): + col_names, col_types, col_def, col_idx = \ + get_column_def(file_path, table_name, skip=skip, + comment_char=comment_char, column_names=column_names, + load_named_columns=load_named_columns, filters=filters) + col_func = [float if t == 'REAL' else int + if t == 'INTEGER' else str for t in col_types] + table_def = 'CREATE TABLE %s (\n %s%s\n);' % ( + table_name, + '%s INTEGER PRIMARY KEY AUTOINCREMENT,' % + pkey_autoincr if pkey_autoincr else '', + ', \n '.join(col_def)) + # print >> sys.stdout, table_def + insert_stmt = 'INSERT INTO %s(%s) VALUES(%s)' % ( + table_name, ','.join(col_names), + ','.join(["?" for x in col_names])) + # print >> sys.stdout, insert_stmt + data_lines = 0 + try: + c = conn.cursor() + c.execute(table_def) + conn.commit() + c.close() + for i, index in enumerate(unique_indexes): + index_name = 'idx_uniq_%s_%d' % (table_name, i) + index_columns = index.split(',') + create_index(conn, table_name, index_name, index_columns, + unique=True) + for i, index in enumerate(indexes): + index_name = 'idx_%s_%d' % (table_name, i) + index_columns = index.split(',') + create_index(conn, table_name, index_name, index_columns) + c = conn.cursor() + tr = TabularReader(file_path, skip=skip, comment_char=comment_char, + col_idx=col_idx, filters=filters) + for linenum, fields in enumerate(tr): + data_lines += 1 + try: + vals = [col_func[i](x) + if x else None for i, x in enumerate(fields)] + c.execute(insert_stmt, vals) + except Exception as e: + print('Failed at line: %d err: %s' % (linenum, e), + file=sys.stderr) + conn.commit() + c.close() + except Exception as e: + exit('Error: %s' % (e)) + + +def create_index(conn, table_name, index_name, index_columns, unique=False): + index_def = "CREATE %s INDEX %s on %s(%s)" % ( + 'UNIQUE' if unique else '', index_name, + table_name, ','.join(index_columns)) + c = conn.cursor() + c.execute(index_def) + conn.commit() + c.close() diff -r 000000000000 -r 3708ff0198b7 macros.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/macros.xml Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,383 @@ + + + 0: + #set $skip_lines = int($tbl.table.metadata.comment_lines) + #end if + #if $skip_lines is not None: + #set $filter_dict = dict() + #set $filter_dict['filter'] = str($fi.filter.filter_type) + #set $filter_dict['count'] = $skip_lines + #silent $input_filters.append($filter_dict) + #end if + #elif $fi.filter.filter_type == 'comment': + #set $filter_dict = dict() + #set $filter_dict['filter'] = 'regex' + #set $filter_dict['pattern'] = '^(%s).*$' % '|'.join([chr(int(x)).replace('|','[|]') for x in (str($fi.filter.comment_char)).split(',')]) + #set $filter_dict['action'] = 'exclude_match' + #silent $input_filters.append($filter_dict) + #elif $fi.filter.filter_type == 'regex': + #set $filter_dict = dict() + #set $filter_dict['filter'] = str($fi.filter.filter_type) + #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) + #set $filter_dict['action'] = str($fi.filter.regex_action) + #silent $input_filters.append($filter_dict) + #elif $fi.filter.filter_type == 'select_columns': + #set $filter_dict = dict() + #set $filter_dict['filter'] = str($fi.filter.filter_type) + #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] + #silent $input_filters.append($filter_dict) + #elif $fi.filter.filter_type == 'replace': + #set $filter_dict = dict() + #set $filter_dict['filter'] = str($fi.filter.filter_type) + #set $filter_dict['column'] = int(str($fi.filter.column).replace('c','')) + #set $filter_dict['pattern'] = str($fi.filter.regex_pattern) + #set $filter_dict['replace'] = str($fi.filter.regex_replace) + #silent $input_filters.append($filter_dict) + #elif str($fi.filter.filter_type).endswith('pend_line_num'): + #set $filter_dict = dict() + #set $filter_dict['filter'] = str($fi.filter.filter_type) + #silent $input_filters.append($filter_dict) + #elif str($fi.filter.filter_type).endswith('pend_text'): + #set $filter_dict = dict() + #set $filter_dict['filter'] = str($fi.filter.filter_type) + #set $filter_dict['column_text'] = str($fi.filter.column_text) + #silent $input_filters.append($filter_dict) + #elif $fi.filter.filter_type == 'normalize': + #set $filter_dict = dict() + #set $filter_dict['filter'] = str($fi.filter.filter_type) + #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')] + #set $filter_dict['separator'] = str($fi.filter.separator) + #silent $input_filters.append($filter_dict) + #end if + #end for +]]> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ^(c?[1-9]\d*)(,c?[1-9]\d*)*$ + + + + + ^(c?[1-9]\d*)$ + + + + + + + + + + + + ^(c?[1-9]\d*)(,c?[1-9]\d*)*$ + + + + ^[^\t\n\r\f\v]+$ + + + + + + + + + + + + + + + + + + + + diff -r 000000000000 -r 3708ff0198b7 query_db.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/query_db.py Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,67 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import re +import sqlite3 as sqlite +import sys + + +TABLE_QUERY = \ + """ + SELECT name, sql + FROM sqlite_master + WHERE type='table' + ORDER BY name + """ + + +def regex_match(expr, item): + return re.match(expr, item) is not None + + +def regex_search(expr, item): + return re.search(expr, item) is not None + + +def regex_sub(expr, replace, item): + return re.sub(expr, replace, item) + + +def get_connection(sqlitedb_path, addfunctions=True): + conn = sqlite.connect(sqlitedb_path) + if addfunctions: + conn.create_function("re_match", 2, regex_match) + conn.create_function("re_search", 2, regex_search) + conn.create_function("re_sub", 3, regex_sub) + return conn + + +def describe_tables(conn, outputFile): + try: + c = conn.cursor() + tables_query = TABLE_QUERY + rslt = c.execute(tables_query).fetchall() + for table, sql in rslt: + print("Table %s:" % table, file=outputFile) + try: + col_query = 'SELECT * FROM %s LIMIT 0' % table + cur = conn.cursor().execute(col_query) + cols = [col[0] for col in cur.description] + print(" Columns: %s" % cols, file=outputFile) + except Exception as exc: + print("Warning: %s" % exc, file=sys.stderr) + except Exception as e: + exit('Error: %s' % (e)) + exit(0) + + +def run_query(conn, query, outputFile, no_header=False): + cur = conn.cursor() + results = cur.execute(query) + if not no_header: + outputFile.write("#%s\n" % '\t'.join( + [str(col[0]) for col in cur.description])) + for i, row in enumerate(results): + outputFile.write("%s\n" % '\t'.join( + [str(val) if val is not None else '' for val in row])) diff -r 000000000000 -r 3708ff0198b7 query_tabular.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/query_tabular.py Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,137 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import json +import optparse +import os.path +import sys + +from load_db import create_table + +from query_db import describe_tables, get_connection, run_query + + +""" +JSON config: +{ tables : [ + { file_path : '/home/galaxy/dataset_101.dat', + table_name : 't1', + column_names : ['c1','c2','c3'], + pkey_autoincr : 'id' + comment_lines : 1 + unique: ['c1'], + index: ['c2', 'c3'] + }, + { file_path : '/home/galaxy/dataset_102.dat', + table_name : 'gff', + column_names : ['seqname',,'date','start','end'] + comment_lines : 1 + load_named_columns : True + filters : [{'filter': 'regex', 'pattern': '#peptide', + 'action': 'exclude_match'}, + {'filter': 'replace', 'column': 3, + 'replace': 'gi[|]', 'pattern': ''}] + }, + { file_path : '/home/galaxy/dataset_103.dat', + table_name : 'test', + column_names : ['c1', 'c2', 'c3'] + } + ] +} +""" + + +def __main__(): + # Parse Command Line + parser = optparse.OptionParser() + parser.add_option('-s', '--sqlitedb', dest='sqlitedb', default=None, + help='The SQLite Database') + parser.add_option('-j', '--jsonfile', dest='jsonfile', default=None, + help='JSON dict of table specifications') + parser.add_option('-q', '--query', dest='query', default=None, + help='SQL query') + parser.add_option('-Q', '--query_file', dest='query_file', default=None, + help='SQL query file') + parser.add_option('-n', '--no_header', dest='no_header', default=False, + action='store_true', + help='Include a column headers line') + parser.add_option('-o', '--output', dest='output', default=None, + help='Output file for query results') + (options, args) = parser.parse_args() + + # determine output destination + if options.output is not None: + try: + outputPath = os.path.abspath(options.output) + outputFile = open(outputPath, 'w') + except Exception as e: + exit('Error: %s' % (e)) + else: + outputFile = sys.stdout + + def _create_table(ti, table): + path = table['file_path'] + table_name =\ + table['table_name'] if 'table_name' in table else 't%d' % (ti + 1) + comment_lines =\ + table['comment_lines'] if 'comment_lines' in table else 0 + comment_char =\ + table['comment_char'] if 'comment_char' in table else None + column_names =\ + table['column_names'] if 'column_names' in table else None + if column_names: + load_named_columns =\ + table['load_named_columns']\ + if 'load_named_columns' in table else False + else: + load_named_columns = False + unique_indexes = table['unique'] if 'unique' in table else [] + indexes = table['index'] if 'index' in table else [] + filters = table['filters'] if 'filters' in table else None + pkey_autoincr = \ + table['pkey_autoincr'] if 'pkey_autoincr' in table else None + create_table(get_connection(options.sqlitedb), path, table_name, + pkey_autoincr=pkey_autoincr, + column_names=column_names, + skip=comment_lines, + comment_char=comment_char, + load_named_columns=load_named_columns, + filters=filters, + unique_indexes=unique_indexes, + indexes=indexes) + + if options.jsonfile: + try: + with open(options.jsonfile) as fh: + tdef = json.load(fh) + if 'tables' in tdef: + for ti, table in enumerate(tdef['tables']): + _create_table(ti, table) + except Exception as e: + exit('Error: %s' % (e)) + + query = None + if options.query_file is not None: + with open(options.query_file, 'r') as fh: + query = '' + for line in fh: + query += line + elif options.query is not None: + query = options.query + + if query is None: + try: + describe_tables(get_connection(options.sqlitedb), outputFile) + except Exception as e: + exit('Error: %s' % (e)) + else: + try: + run_query(get_connection(options.sqlitedb), query, outputFile, + no_header=options.no_header) + except Exception as e: + exit('Error: %s' % (e)) + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 3708ff0198b7 query_tabular.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/query_tabular.xml Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,510 @@ + + using sqlite sql + + + macros.xml + + + + + + + + +$sqlquery + + +#import json +#set $jtbldef = dict() +#set $jtbls = [] +#set $jtbldef['tables'] = $jtbls +#for $i,$tbl in enumerate($tables): + #set $jtbl = dict() + #set $jtbl['file_path'] = str($tbl.table) + #if $tbl.tbl_opts.table_name: + #set $tname = str($tbl.tbl_opts.table_name) + #else + #set $tname = 't' + str($i + 1) + #end if + #set $jtbl['table_name'] = $tname + ## #if $tbl.tbl_opts.sel_cols: + ## #set $jtbl['sel_cols'] = $tbl.tbl_opts.sel_cols el_cols + ## #end if + #if $tbl.tbl_opts.pkey_autoincr: + #set $jtbl['pkey_autoincr'] = str($tbl.tbl_opts.pkey_autoincr) + #end if + #if $tbl.tbl_opts.col_names: + #set $col_names = str($tbl.tbl_opts.col_names) + #if $tbl.tbl_opts.load_named_columns: + #set $jtbl['load_named_columns'] = True + #end if + #else + #set $col_names = '' + #end if + #set $jtbl['column_names'] = $col_names + #set $idx_unique = [] + #set $idx_non = [] + #for $idx in $tbl.tbl_opts.indexes: + #if $idx.unique: + #silent $idx_unique.append(str($idx.index_columns)) + #else: + #silent $idx_non.append(str($idx.index_columns)) + #end if + #end for + #if len($idx_unique) > 0: + #set $jtbl['unique'] = $idx_unique + #end if + #if len($idx_non) > 0: + #set $jtbl['index'] = $idx_non + #end if + #set $linefilters = $tbl.input_opts.linefilters + @LINEFILTERS@ + #if $input_filters: + #set $jtbl['filters'] = $input_filters + #end if + #set $jtbls += [$jtbl] +#end for +#echo $json.dumps($jtbldef) + + + + +
+ +
+ + +
+ +
+
+ + By default, tables will be named: t1,t2,...,tn (table names must be unique) + ^[A-Za-z]\w*$ + + + By default, table columns will be named: c1,c2,c3,...,cn (column names for a table must be unique) + You can override the default names by entering a comma -separated list of names, e.g. ',name1,,,name2' would rename the second and fifth columns. + + + ^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$ + + + + ^([A-Za-z]\w*)?$ + + + + + Create an index on the column names: e,g, c1 or c2,c4 + ^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$ + + +
+
+ + + By default: tables are named: t1,t2,...,tn and columns in each table: c1,c2,...,cn + + ^(?ims)\s*select\s+.*\s+from\s+.*$ + + +
+ + + save_db + + + not save_db or (sqlquery and len(sqlquery.strip()) > 0) + + + + + + +
+ + + + + + +
+
+ + +
+
+ + +
+ + + + + + +
+
+ + +
+
+ + +
+ + + + +
+ + + + + + +
+
+ +
+
+ + +
+ + + + + + +
+
+ + +
+ + + + +
+ + + + + + +
+
+ +
+
+ + +
+ + + + +
+ + + + + + +
+
+ + +
+
+ + +
+ + + + + + +
+
+ + +
+
+ + +
+ + +
+ +
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+ + +
+ + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+ + +
+ + + + + + + + + + + + + + + + + + + + + + + + + +
+
+ + +
+
+ + +
+ +
+ +
diff -r 000000000000 -r 3708ff0198b7 sqlite_to_tabular.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sqlite_to_tabular.py Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +from __future__ import print_function + +import optparse +import os.path +import sys + +from query_db import describe_tables, get_connection, run_query + + +def __main__(): + # Parse Command Line + parser = optparse.OptionParser() + parser.add_option('-s', '--sqlitedb', dest='sqlitedb', default=None, + help='The SQLite Database') + parser.add_option('-q', '--query', dest='query', default=None, + help='SQL query') + parser.add_option('-Q', '--query_file', dest='query_file', default=None, + help='SQL query file') + parser.add_option('-n', '--no_header', dest='no_header', default=False, + action='store_true', + help='Include a column headers line') + parser.add_option('-o', '--output', dest='output', default=None, + help='Output file for query results') + (options, args) = parser.parse_args() + + # determine output destination + if options.output is not None: + try: + outputPath = os.path.abspath(options.output) + outputFile = open(outputPath, 'w') + except Exception as e: + exit('Error: %s' % (e)) + else: + outputFile = sys.stdout + + query = None + if options.query_file is not None: + with open(options.query_file, 'r') as fh: + query = fh.read() + elif options.query is not None: + query = options.query + + if query is None: + try: + describe_tables(get_connection(options.sqlitedb), outputFile) + except Exception as e: + exit('Error: %s' % (e)) + exit(0) + else: + try: + run_query(get_connection(options.sqlitedb), query, outputFile, + no_header=options.no_header) + except Exception as e: + exit('Error: %s' % (e)) + + +if __name__ == "__main__": + __main__() diff -r 000000000000 -r 3708ff0198b7 test-data/IEDB.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/IEDB.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,17 @@ +#ID allele seq_num start end length peptide method percentile_rank ann_ic50 ann_rank smm_ic50 smm_rank comblib_sidney2008_score comblib_sidney2008_rank netmhcpan_ic50 netmhcpan_rank +PPAP2C HLA-A*02:01 1 3 11 9 GMYCMVFLV Consensus (ann/smm/comblib_sidney2008) 0.2 4 0.2 3.77 0.2 7.1e-06 0.5 - - +PPAP2C HLA-A*23:01 1 1 9 9 SFGMYCMVF Consensus (ann/smm) 0.5 67 0.5 137.54 0.5 - - - - +PPAP2C HLA-A*23:01 1 4 12 9 MYCMVFLVK Consensus (ann/smm) 0.65 146 0.7 160.11 0.6 - - - - +PPAP2C HLA-A*02:01 1 2 10 9 FGMYCMVFL Consensus (ann/smm/comblib_sidney2008) 2.3 222 3.1 150.01 2.3 2.14e-05 1.3 - - +PPAP2C HLA-A*23:01 1 3 11 9 GMYCMVFLV Consensus (ann/smm) 4.95 3256 4 2706.64 5.9 - - - - +PPAP2C HLA-A*23:01 1 2 10 9 FGMYCMVFL Consensus (ann/smm) 6.55 4423 4.9 4144.10 8.2 - - - - +PPAP2C HLA-A*02:01 1 1 9 9 SFGMYCMVF Consensus (ann/smm/comblib_sidney2008) 45 24390 45 44989.38 39 0.01 91 - - +PPAP2C HLA-A*02:01 1 4 12 9 MYCMVFLVK Consensus (ann/smm/comblib_sidney2008) 54 23399 41 157801.09 54 0.01 86 - - +ADAMTSL1 HLA-A*02:01 1 1 9 9 SLDMCISGL Consensus (ann/smm/comblib_sidney2008) 1 26 1 51.65 0.9 3.02e-05 1.7 - - +ADAMTSL1 HLA-A*23:01 1 4 12 9 MCISGLCQL Consensus (ann/smm) 6.65 5781 5.9 3626.02 7.4 - - - - +ADAMTSL1 HLA-A*02:01 1 4 12 9 MCISGLCQL Consensus (ann/smm/comblib_sidney2008) 14 1823 6.5 2612.82 14 0.00056 24 - - +ADAMTSL1 HLA-A*23:01 1 1 9 9 SLDMCISGL Consensus (ann/smm) 30.5 27179 34 24684.82 27 - - - - +ADAMTSL1 HLA-A*02:01 1 2 10 9 LDMCISGLC Consensus (ann/smm/comblib_sidney2008) 42 23677 42 53716.78 41 0.01 71 - - +ADAMTSL1 HLA-A*23:01 1 3 11 9 DMCISGLCQ Consensus (ann/smm) 64.5 34451 73 118148.99 56 - - - - +ADAMTSL1 HLA-A*23:01 1 2 10 9 LDMCISGLC Consensus (ann/smm) 76.0 33222 62 665932.18 90 - - - - +ADAMTSL1 HLA-A*02:01 1 3 11 9 DMCISGLCQ Consensus (ann/smm/comblib_sidney2008) 97 31630 98 639896.89 71 0.03 97 - - diff -r 000000000000 -r 3708ff0198b7 test-data/add_to_db_results.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/add_to_db_results.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,3 @@ +#id first last pets quote +1 Paula Brown 2 Time flies like and arrow. Fruit flies like a banana. +2 Steven Jones 1 I would have wrtten less if I had more time diff -r 000000000000 -r 3708ff0198b7 test-data/customers.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/customers.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,5 @@ +#CustomerID FirstName LastName Email DOB Phone +1 John Smith John.Smith@yahoo.com 1968-02-04 626 222-2222 +2 Steven Goldfish goldfish@fishhere.net 1974-04-04 323 455-4545 +3 Paula Brown pb@herowndomain.org 1978-05-24 416 323-3232 +4 James Smith jim@supergig.co.uk 1980-10-20 416 323-8888 diff -r 000000000000 -r 3708ff0198b7 test-data/filtered_people_results.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtered_people_results.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,4 @@ +1 Paula Brown 1978-05-24 2 +2 Steven Jones 1974-04-04 1 +3 Jane Doe 1978-05-24 0 +4 James Smith 1980-10-20 1 diff -r 000000000000 -r 3708ff0198b7 test-data/filtered_pets_results.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtered_pets_results.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,4 @@ +1 Rex dog +1 Fluff cat +2 Allie cat +4 Spot diff -r 000000000000 -r 3708ff0198b7 test-data/netMHC_summary.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/netMHC_summary.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,9 @@ +#pos peptide logscore affinity(nM) Bind Level Protein Name Allele +2 GMYCMVFLV 0.858 4 SB PPAP2C HLA-A02:01 +1 FGMYCMVFL 0.501 222 WB PPAP2C HLA-A02:01 +3 MYCMVFLVK 0.070 23399 PPAP2C HLA-A02:01 +0 SFGMYCMVF 0.066 24390 PPAP2C HLA-A02:01 +0 SLDMCISGL 0.698 26 SB ADAMTSL1 HLA-A02:01 +3 MCISGLCQL 0.306 1823 ADAMTSL1 HLA-A02:01 +1 LDMCISGLC 0.069 23677 ADAMTSL1 HLA-A02:01 +2 DMCISGLCQ 0.042 31630 ADAMTSL1 HLA-A02:01 diff -r 000000000000 -r 3708ff0198b7 test-data/pet_normalized_query_results.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pet_normalized_query_results.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,3 @@ +#id first last dob name animal pets +1 Paula Brown 1978-05-24 Fluff cat 2 +2 Steven Jones 1974-04-04 Allie cat 1 diff -r 000000000000 -r 3708ff0198b7 test-data/pets.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/pets.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,7 @@ +#People with pets +Pets FirstName LastName DOB PetNames PetType +2 Paula Brown 24/05/78 Rex,Fluff dog,cat +1 Steven Jones 04/04/74 Allie cat +0 Jane Doe 24/05/78 +1 James Smith 20/10/80 Spot + diff -r 000000000000 -r 3708ff0198b7 test-data/query_results.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/query_results.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,17 @@ +#ID peptide start end percentile_rank logscore affinity Bind_Level +PPAP2C GMYCMVFLV 3 11 0.2 0.858 4 SB +PPAP2C GMYCMVFLV 3 11 4.95 0.858 4 SB +ADAMTSL1 SLDMCISGL 1 9 1.0 0.698 26 SB +ADAMTSL1 SLDMCISGL 1 9 30.5 0.698 26 SB +PPAP2C FGMYCMVFL 2 10 2.3 0.501 222 WB +PPAP2C FGMYCMVFL 2 10 6.55 0.501 222 WB +ADAMTSL1 MCISGLCQL 4 12 6.65 0.306 1823 +ADAMTSL1 MCISGLCQL 4 12 14.0 0.306 1823 +PPAP2C MYCMVFLVK 4 12 0.65 0.07 23399 +PPAP2C MYCMVFLVK 4 12 54.0 0.07 23399 +ADAMTSL1 LDMCISGLC 2 10 42.0 0.069 23677 +ADAMTSL1 LDMCISGLC 2 10 76.0 0.069 23677 +PPAP2C SFGMYCMVF 1 9 0.5 0.066 24390 +PPAP2C SFGMYCMVF 1 9 45.0 0.066 24390 +ADAMTSL1 DMCISGLCQ 3 11 64.5 0.042 31630 +ADAMTSL1 DMCISGLCQ 3 11 97.0 0.042 31630 diff -r 000000000000 -r 3708ff0198b7 test-data/regex_results.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/regex_results.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,4 @@ +#FirstName LastName DOB +Steven Goldfish 04/04/74 +Paula Brown 24/05/78 +James Smith 20/10/80 diff -r 000000000000 -r 3708ff0198b7 test-data/sales.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sales.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,6 @@ +#CustomerID Date SaleAmount +2 2004-05-06 100.22 +1 2004-05-07 99.95 +3 2004-05-07 122.95 +3 2004-05-13 100.00 +4 2004-05-22 555.55 diff -r 000000000000 -r 3708ff0198b7 test-data/sales_results.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/sales_results.tsv Tue Jul 18 09:07:07 2017 -0400 @@ -0,0 +1,5 @@ +#FirstName LastName TotalSales +James Smith 555.55 +Paula Brown 222.95 +Steven Goldfish 100.22 +John Smith 99.95 diff -r 000000000000 -r 3708ff0198b7 test-data/testdb.sqlite Binary file test-data/testdb.sqlite has changed