# HG changeset patch
# User iuc
# Date 1503089289 14400
# Node ID 8a33b442ecd91f3b86d1cb964cb6c599612a58b2
# Parent 3708ff0198b7869a9469840778799b4b9d7892a3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 81f69ad5f39223059c40501e55ac777d3feca845
diff -r 3708ff0198b7 -r 8a33b442ecd9 filters.py
--- a/filters.py Tue Jul 18 09:07:07 2017 -0400
+++ b/filters.py Fri Aug 18 16:48:09 2017 -0400
@@ -33,7 +33,8 @@
r = filter_dict['replace']
c = int(filter_dict['column']) - 1
self.func = lambda i, l: '\t'.join(
- [x if j != c else re.sub(p, r, x) for j, x in enumerate(l.split('\t'))])
+ [x if j != c else re.sub(p, r, x)
+ for j, x in enumerate(l.split('\t'))])
elif filter_dict['filter'] == 'prepend_line_num':
self.func = lambda i, l: '%d\t%s' % (i, l)
elif filter_dict['filter'] == 'append_line_num':
diff -r 3708ff0198b7 -r 8a33b442ecd9 load_db.py
--- a/load_db.py Tue Jul 18 09:07:07 2017 -0400
+++ b/load_db.py Fri Aug 18 16:48:09 2017 -0400
@@ -2,11 +2,165 @@
from __future__ import print_function
+import re
import sys
from filters import TabularReader
+SQLITE_KEYWORDS = [
+ 'ABORT',
+ 'ACTION',
+ 'ADD',
+ 'AFTER',
+ 'ALL',
+ 'ALTER',
+ 'ANALYZE',
+ 'AND',
+ 'AS',
+ 'ASC',
+ 'ATTACH',
+ 'AUTOINCREMENT',
+ 'BEFORE',
+ 'BEGIN',
+ 'BETWEEN',
+ 'BY',
+ 'CASCADE',
+ 'CASE',
+ 'CAST',
+ 'CHECK',
+ 'COLLATE',
+ 'COLUMN',
+ 'COMMIT',
+ 'CONFLICT',
+ 'CONSTRAINT',
+ 'CREATE',
+ 'CROSS',
+ 'CURRENT_DATE',
+ 'CURRENT_TIME',
+ 'CURRENT_TIMESTAMP',
+ 'DATABASE',
+ 'DEFAULT',
+ 'DEFERRABLE',
+ 'DEFERRED',
+ 'DELETE',
+ 'DESC',
+ 'DETACH',
+ 'DISTINCT',
+ 'DROP',
+ 'EACH',
+ 'ELSE',
+ 'END',
+ 'ESCAPE',
+ 'EXCEPT',
+ 'EXCLUSIVE',
+ 'EXISTS',
+ 'EXPLAIN',
+ 'FAIL',
+ 'FOR',
+ 'FOREIGN',
+ 'FROM',
+ 'FULL',
+ 'GLOB',
+ 'GROUP',
+ 'HAVING',
+ 'IF',
+ 'IGNORE',
+ 'IMMEDIATE',
+ 'IN',
+ 'INDEX',
+ 'INDEXED',
+ 'INITIALLY',
+ 'INNER',
+ 'INSERT',
+ 'INSTEAD',
+ 'INTERSECT',
+ 'INTO',
+ 'IS',
+ 'ISNULL',
+ 'JOIN',
+ 'KEY',
+ 'LEFT',
+ 'LIKE',
+ 'LIMIT',
+ 'MATCH',
+ 'NATURAL',
+ 'NO',
+ 'NOT',
+ 'NOTNULL',
+ 'NULL',
+ 'OF',
+ 'OFFSET',
+ 'ON',
+ 'OR',
+ 'ORDER',
+ 'OUTER',
+ 'PLAN',
+ 'PRAGMA',
+ 'PRIMARY',
+ 'QUERY',
+ 'RAISE',
+ 'RECURSIVE',
+ 'REFERENCES',
+ 'REGEXP',
+ 'REINDEX',
+ 'RELEASE',
+ 'RENAME',
+ 'REPLACE',
+ 'RESTRICT',
+ 'RIGHT',
+ 'ROLLBACK',
+ 'ROW',
+ 'SAVEPOINT',
+ 'SELECT',
+ 'SET',
+ 'TABLE',
+ 'TEMP',
+ 'TEMPORARY',
+ 'THEN',
+ 'TO',
+ 'TRANSACTION',
+ 'TRIGGER',
+ 'UNION',
+ 'UNIQUE',
+ 'UPDATE',
+ 'USING',
+ 'VACUUM',
+ 'VALUES',
+ 'VIEW',
+ 'VIRTUAL',
+ 'WHEN',
+ 'WHERE',
+ 'WITH',
+ 'WITHOUT'
+]
+
+
+def get_valid_column_name(name):
+ valid_name = name
+ if not name or not name.strip():
+ return None
+ elif name.upper() in SQLITE_KEYWORDS:
+ valid_name = '"%s"' % name
+ elif re.match('^[a-zA-Z]\w*$', name):
+ pass
+ elif re.match('^"[^"]+"$', name):
+ pass
+ elif re.match('^\[[^\[\]]*\]$', name):
+ pass
+ elif re.match("^`[^`]+`$", name):
+ pass
+ elif name.find('"') < 0:
+ valid_name = '"%s"' % name
+ elif name.find('[') < 0 and name.find(']') < 0:
+ valid_name = '[%s]' % name
+ elif name.find('`') < 0:
+ valid_name = '`%s`' % name
+ elif name.find("'") < 0:
+ valid_name = "'%s'" % name
+ return valid_name
+
+
def getValueType(val):
if val or 0. == val:
try:
@@ -23,14 +177,19 @@
def get_column_def(file_path, table_name, skip=0, comment_char='#',
column_names=None, max_lines=100, load_named_columns=False,
- filters=None):
+ firstlinenames=False, filters=None):
col_pref = ['TEXT', 'REAL', 'INTEGER', None]
col_types = []
col_idx = None
+ col_names = []
try:
tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
col_idx=None, filters=filters)
for linenum, fields in enumerate(tr):
+ if linenum == 0 and firstlinenames:
+ col_names = [get_valid_column_name(name) or 'c%d' % (i + 1)
+ for i, name in enumerate(fields)]
+ continue
if linenum > max_lines:
break
try:
@@ -48,24 +207,24 @@
for i, col_type in enumerate(col_types):
if not col_type:
col_types[i] = 'TEXT'
+ if not col_names:
+ col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
if column_names:
- col_names = []
if load_named_columns:
col_idx = []
+ cnames = []
for i, cname in enumerate(
[cn.strip() for cn in column_names.split(',')]):
if cname != '':
col_idx.append(i)
- col_names.append(cname)
+ cnames.append(cname)
col_types = [col_types[i] for i in col_idx]
+ col_names = cnames
else:
- col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
for i, cname in enumerate(
[cn.strip() for cn in column_names.split(',')]):
if cname and i < len(col_names):
col_names[i] = cname
- else:
- col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
col_def = []
for i, col_name in enumerate(col_names):
col_def.append('%s %s' % (col_names[i], col_types[i]))
@@ -74,12 +233,14 @@
def create_table(conn, file_path, table_name, skip=0, comment_char='#',
pkey_autoincr=None, column_names=None,
- load_named_columns=False, filters=None,
- unique_indexes=[], indexes=[]):
+ load_named_columns=False, firstlinenames=False,
+ filters=None, unique_indexes=[], indexes=[]):
col_names, col_types, col_def, col_idx = \
get_column_def(file_path, table_name, skip=skip,
comment_char=comment_char, column_names=column_names,
- load_named_columns=load_named_columns, filters=filters)
+ load_named_columns=load_named_columns,
+ firstlinenames=firstlinenames,
+ filters=filters)
col_func = [float if t == 'REAL' else int
if t == 'INTEGER' else str for t in col_types]
table_def = 'CREATE TABLE %s (\n %s%s\n);' % (
@@ -98,19 +259,13 @@
c.execute(table_def)
conn.commit()
c.close()
- for i, index in enumerate(unique_indexes):
- index_name = 'idx_uniq_%s_%d' % (table_name, i)
- index_columns = index.split(',')
- create_index(conn, table_name, index_name, index_columns,
- unique=True)
- for i, index in enumerate(indexes):
- index_name = 'idx_%s_%d' % (table_name, i)
- index_columns = index.split(',')
- create_index(conn, table_name, index_name, index_columns)
+
c = conn.cursor()
tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
col_idx=col_idx, filters=filters)
for linenum, fields in enumerate(tr):
+ if linenum == 0 and firstlinenames:
+ continue
data_lines += 1
try:
vals = [col_func[i](x)
@@ -121,15 +276,28 @@
file=sys.stderr)
conn.commit()
c.close()
+ for i, index in enumerate(unique_indexes):
+ index_name = 'idx_uniq_%s_%d' % (table_name, i)
+ index_columns = index.split(',')
+ create_index(conn, table_name, index_name, index_columns,
+ unique=True)
+ for i, index in enumerate(indexes):
+ index_name = 'idx_%s_%d' % (table_name, i)
+ index_columns = index.split(',')
+ create_index(conn, table_name, index_name, index_columns)
except Exception as e:
exit('Error: %s' % (e))
def create_index(conn, table_name, index_name, index_columns, unique=False):
- index_def = "CREATE %s INDEX %s on %s(%s)" % (
+ index_def = 'CREATE %s INDEX %s on %s(%s)' % (
'UNIQUE' if unique else '', index_name,
table_name, ','.join(index_columns))
- c = conn.cursor()
- c.execute(index_def)
- conn.commit()
- c.close()
+ try:
+ c = conn.cursor()
+ c.execute(index_def)
+ conn.commit()
+ c.close()
+ except Exception as e:
+ print('Failed: %s err: %s' % (index_def, e), file=sys.stderr)
+ raise(e)
diff -r 3708ff0198b7 -r 8a33b442ecd9 macros.xml
--- a/macros.xml Tue Jul 18 09:07:07 2017 -0400
+++ b/macros.xml Fri Aug 18 16:48:09 2017 -0400
@@ -60,6 +60,52 @@
#end for
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -81,23 +127,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
diff -r 3708ff0198b7 -r 8a33b442ecd9 query_db.py
--- a/query_db.py Tue Jul 18 09:07:07 2017 -0400
+++ b/query_db.py Fri Aug 18 16:48:09 2017 -0400
@@ -56,12 +56,12 @@
exit(0)
-def run_query(conn, query, outputFile, no_header=False):
+def run_query(conn, query, outputFile, no_header=False, comment_char='#'):
cur = conn.cursor()
results = cur.execute(query)
if not no_header:
- outputFile.write("#%s\n" % '\t'.join(
- [str(col[0]) for col in cur.description]))
+ outputFile.write("%s%s\n" % (comment_char, '\t'.join(
+ [str(col[0]) for col in cur.description])))
for i, row in enumerate(results):
outputFile.write("%s\n" % '\t'.join(
[str(val) if val is not None else '' for val in row]))
diff -r 3708ff0198b7 -r 8a33b442ecd9 query_tabular.py
--- a/query_tabular.py Tue Jul 18 09:07:07 2017 -0400
+++ b/query_tabular.py Fri Aug 18 16:48:09 2017 -0400
@@ -56,6 +56,8 @@
parser.add_option('-n', '--no_header', dest='no_header', default=False,
action='store_true',
help='Include a column headers line')
+ parser.add_option('-c', '--comment_char', dest='comment_char', default='',
+ help='comment character to prefix column header line')
parser.add_option('-o', '--output', dest='output', default=None,
help='Output file for query results')
(options, args) = parser.parse_args()
@@ -80,6 +82,8 @@
table['comment_char'] if 'comment_char' in table else None
column_names =\
table['column_names'] if 'column_names' in table else None
+ firstlinenames =\
+ table['firstlinenames'] if 'firstlinenames' in table else False
if column_names:
load_named_columns =\
table['load_named_columns']\
@@ -93,6 +97,7 @@
table['pkey_autoincr'] if 'pkey_autoincr' in table else None
create_table(get_connection(options.sqlitedb), path, table_name,
pkey_autoincr=pkey_autoincr,
+ firstlinenames=firstlinenames,
column_names=column_names,
skip=comment_lines,
comment_char=comment_char,
@@ -128,7 +133,8 @@
else:
try:
run_query(get_connection(options.sqlitedb), query, outputFile,
- no_header=options.no_header)
+ no_header=options.no_header,
+ comment_char=options.comment_char)
except Exception as e:
exit('Error: %s' % (e))
diff -r 3708ff0198b7 -r 8a33b442ecd9 query_tabular.xml
--- a/query_tabular.xml Tue Jul 18 09:07:07 2017 -0400
+++ b/query_tabular.xml Fri Aug 18 16:48:09 2017 -0400
@@ -1,4 +1,4 @@
-
+
using sqlite sql
@@ -26,7 +26,7 @@
-j '$table_json'
#if $sqlquery:
-Q '$query_file'
- $no_header
+ @RESULT_HEADER@
-o '$output'
#end if
]]>
@@ -54,6 +54,9 @@
#if $tbl.tbl_opts.pkey_autoincr:
#set $jtbl['pkey_autoincr'] = str($tbl.tbl_opts.pkey_autoincr)
#end if
+ #if $tbl.tbl_opts.column_names_from_first_line:
+ #set $jtbl['firstlinenames'] = True
+ #end if
#if $tbl.tbl_opts.col_names:
#set $col_names = str($tbl.tbl_opts.col_names)
#if $tbl.tbl_opts.load_named_columns:
@@ -104,6 +107,9 @@
By default, tables will be named: t1,t2,...,tn (table names must be unique)
^[A-Za-z]\w*$
+
By default, table columns will be named: c1,c2,c3,...,cn (column names for a table must be unique)
You can override the default names by entering a comma -separated list of names, e.g. ',name1,,,name2' would rename the second and fifth columns.
@@ -119,8 +125,9 @@
- Create an index on the column names: e,g, c1 or c2,c4
+ Create an index on the column names: e.g. for default column names: c1 or c2,c4 ( use the names you gave for columns)
^([A-Za-z]\w*|"\S+[^,"]*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])(,([A-Za-z]\w*|"\S+.*"|`\S+[^,`]*`|[[]\S+[^,"]*[]])?)*$
+
@@ -132,7 +139,7 @@
^(?ims)\s*select\s+.*\s+from\s+.*$
-
+
@@ -391,6 +398,56 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
NLDQSGTNVAK-COOH Carbamidomethylation of C(4) trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.05257.05257.2 5257 -1.0 788.87384 2+ 2+ 1575.7311189571503 0 1.2727711831177586 100.0 Doubtful
+5 NP_001273432_F75C, NP_001273433_F77C, NP_001273434_F77C, NP_006635_F75C ACNDPFIQK GR; GR; GR; GR EK; EK; EK; EK 74; 76; 76; 74 NH2-ACNDPFIQK-COOH Carbamidomethylation of C(2) trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.09560.09560.2 9560 -1.0 546.760803 2+ 2+ 1091.50697969691 0 0.06709466946167521 94.73684210526316 Doubtful
+6 NP_001028196, NP_001244919, NP_036311 LQLLPESFICK KR; KR; KR EK; EK; EK 198; 163; 277 NH2-LQLLPESFICK-COOH Carbamidomethylation of C(10) trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.22345.22345.2 22345 -1.0 674.370911 2+ 2+ 1346.7268088708604 0 0.3412036795550375 99.38650306748467 Doubtful
+7 NP_001909_rs72973763:I239V,rs12021720:S384G DMTVPVLVSKPPVFTGK PK DK 234 NH2-DMTVPVLVSKPPVFTGK-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.20464.20464.2 20464 -1.0 908.008545 2+ 2+ 1814.0011927988103 0 0.7402290254293575 94.73684210526316 Doubtful
+8 NP_001265138, NP_001265138_rs2274136:A858T,F879L, NP_001265139, NP_001265139_rs2274136:A785T,F806L, NP_005115, NP_005115_rs2274136:A827T,F848L FGVSSESKPEEVK FK; FK; FK; FK; FK; FK KD; KD; KD; KD; KD; KD 992; 992; 919; 919; 961; 961 NH2-FGVSSESKPEEVK-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.06665.06665.2 6665 -1.0 711.858643 2+ 2+ 1421.7038244968105 0 -0.7666055674357796 100.0 Doubtful
diff -r 3708ff0198b7 -r 8a33b442ecd9 test-data/psm_report_out1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psm_report_out1.tsv Fri Aug 18 16:48:09 2017 -0400
@@ -0,0 +1,9 @@
+Scan Sequence Protein(s) Position m/z Precursor m/z Error
+1 PYANQPTVR NP_116558 2 523.272583 -4.42404529799
+2 DANTQVHTLR NP_443137, NP_443137_S1016F 443; 443 577.799622 0.611733835535
+3 SSWAGLQFPVGR NP_066544_R21W 19 652.843567 4.02947892238
+4 AQACNLDQSGTNVAK NP_112092_rs7285167:R182C 179 788.87384 1.27277118312
+5 ACNDPFIQK NP_001273432_F75C, NP_001273433_F77C, NP_001273434_F77C, NP_006635_F75C 74; 76; 76; 74 546.760803 0.0670946694617
+6 LQLLPESFICK NP_001028196, NP_001244919, NP_036311 198; 163; 277 674.370911 0.341203679555
+7 DMTVPVLVSKPPVFTGK NP_001909_rs72973763:I239V,rs12021720:S384G 234 908.008545 0.740229025429
+8 FGVSSESKPEEVK NP_001265138, NP_001265138_rs2274136:A858T,F879L, NP_001265139, NP_001265139_rs2274136:A785T,F806L, NP_005115, NP_005115_rs2274136:A827T,F848L 992; 992; 919; 919; 961; 961 711.858643 -0.766605567436
diff -r 3708ff0198b7 -r 8a33b442ecd9 test-data/psm_report_out2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psm_report_out2.tsv Fri Aug 18 16:48:09 2017 -0400
@@ -0,0 +1,5 @@
+Scan m/z Precursor m/z Error Sequence Protein(s)
+1 523.272583 -4.42404529799 PYANQPTVR NP_116558
+3 652.843567 4.02947892238 SSWAGLQFPVGR NP_066544_R21W
+4 788.87384 1.27277118312 AQACNLDQSGTNVAK NP_112092_rs7285167:R182C
+7 908.008545 0.740229025429 DMTVPVLVSKPPVFTGK NP_001909_rs72973763:I239V,rs12021720:S384G