# HG changeset patch
# User iuc
# Date 1503089300 14400
# Node ID c1b700bc01505a5cc77dc6dac2ddf9c28c352bd3
# Parent 859064f07be491736fb5b9d2b93ff709fae89cf2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 81f69ad5f39223059c40501e55ac777d3feca845
diff -r 859064f07be4 -r c1b700bc0150 filters.py
--- a/filters.py Tue Jul 18 09:07:26 2017 -0400
+++ b/filters.py Fri Aug 18 16:48:20 2017 -0400
@@ -33,7 +33,8 @@
r = filter_dict['replace']
c = int(filter_dict['column']) - 1
self.func = lambda i, l: '\t'.join(
- [x if j != c else re.sub(p, r, x) for j, x in enumerate(l.split('\t'))])
+ [x if j != c else re.sub(p, r, x)
+ for j, x in enumerate(l.split('\t'))])
elif filter_dict['filter'] == 'prepend_line_num':
self.func = lambda i, l: '%d\t%s' % (i, l)
elif filter_dict['filter'] == 'append_line_num':
diff -r 859064f07be4 -r c1b700bc0150 load_db.py
--- a/load_db.py Tue Jul 18 09:07:26 2017 -0400
+++ b/load_db.py Fri Aug 18 16:48:20 2017 -0400
@@ -2,11 +2,165 @@
from __future__ import print_function
+import re
import sys
from filters import TabularReader
+SQLITE_KEYWORDS = [
+ 'ABORT',
+ 'ACTION',
+ 'ADD',
+ 'AFTER',
+ 'ALL',
+ 'ALTER',
+ 'ANALYZE',
+ 'AND',
+ 'AS',
+ 'ASC',
+ 'ATTACH',
+ 'AUTOINCREMENT',
+ 'BEFORE',
+ 'BEGIN',
+ 'BETWEEN',
+ 'BY',
+ 'CASCADE',
+ 'CASE',
+ 'CAST',
+ 'CHECK',
+ 'COLLATE',
+ 'COLUMN',
+ 'COMMIT',
+ 'CONFLICT',
+ 'CONSTRAINT',
+ 'CREATE',
+ 'CROSS',
+ 'CURRENT_DATE',
+ 'CURRENT_TIME',
+ 'CURRENT_TIMESTAMP',
+ 'DATABASE',
+ 'DEFAULT',
+ 'DEFERRABLE',
+ 'DEFERRED',
+ 'DELETE',
+ 'DESC',
+ 'DETACH',
+ 'DISTINCT',
+ 'DROP',
+ 'EACH',
+ 'ELSE',
+ 'END',
+ 'ESCAPE',
+ 'EXCEPT',
+ 'EXCLUSIVE',
+ 'EXISTS',
+ 'EXPLAIN',
+ 'FAIL',
+ 'FOR',
+ 'FOREIGN',
+ 'FROM',
+ 'FULL',
+ 'GLOB',
+ 'GROUP',
+ 'HAVING',
+ 'IF',
+ 'IGNORE',
+ 'IMMEDIATE',
+ 'IN',
+ 'INDEX',
+ 'INDEXED',
+ 'INITIALLY',
+ 'INNER',
+ 'INSERT',
+ 'INSTEAD',
+ 'INTERSECT',
+ 'INTO',
+ 'IS',
+ 'ISNULL',
+ 'JOIN',
+ 'KEY',
+ 'LEFT',
+ 'LIKE',
+ 'LIMIT',
+ 'MATCH',
+ 'NATURAL',
+ 'NO',
+ 'NOT',
+ 'NOTNULL',
+ 'NULL',
+ 'OF',
+ 'OFFSET',
+ 'ON',
+ 'OR',
+ 'ORDER',
+ 'OUTER',
+ 'PLAN',
+ 'PRAGMA',
+ 'PRIMARY',
+ 'QUERY',
+ 'RAISE',
+ 'RECURSIVE',
+ 'REFERENCES',
+ 'REGEXP',
+ 'REINDEX',
+ 'RELEASE',
+ 'RENAME',
+ 'REPLACE',
+ 'RESTRICT',
+ 'RIGHT',
+ 'ROLLBACK',
+ 'ROW',
+ 'SAVEPOINT',
+ 'SELECT',
+ 'SET',
+ 'TABLE',
+ 'TEMP',
+ 'TEMPORARY',
+ 'THEN',
+ 'TO',
+ 'TRANSACTION',
+ 'TRIGGER',
+ 'UNION',
+ 'UNIQUE',
+ 'UPDATE',
+ 'USING',
+ 'VACUUM',
+ 'VALUES',
+ 'VIEW',
+ 'VIRTUAL',
+ 'WHEN',
+ 'WHERE',
+ 'WITH',
+ 'WITHOUT'
+]
+
+
+def get_valid_column_name(name):
+ valid_name = name
+ if not name or not name.strip():
+ return None
+ elif name.upper() in SQLITE_KEYWORDS:
+ valid_name = '"%s"' % name
+ elif re.match('^[a-zA-Z]\w*$', name):
+ pass
+ elif re.match('^"[^"]+"$', name):
+ pass
+ elif re.match('^\[[^\[\]]*\]$', name):
+ pass
+ elif re.match("^`[^`]+`$", name):
+ pass
+ elif name.find('"') < 0:
+ valid_name = '"%s"' % name
+ elif name.find('[') < 0 and name.find(']') < 0:
+ valid_name = '[%s]' % name
+ elif name.find('`') < 0:
+ valid_name = '`%s`' % name
+ elif name.find("'") < 0:
+ valid_name = "'%s'" % name
+ return valid_name
+
+
def getValueType(val):
if val or 0. == val:
try:
@@ -23,14 +177,19 @@
def get_column_def(file_path, table_name, skip=0, comment_char='#',
column_names=None, max_lines=100, load_named_columns=False,
- filters=None):
+ firstlinenames=False, filters=None):
col_pref = ['TEXT', 'REAL', 'INTEGER', None]
col_types = []
col_idx = None
+ col_names = []
try:
tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
col_idx=None, filters=filters)
for linenum, fields in enumerate(tr):
+ if linenum == 0 and firstlinenames:
+ col_names = [get_valid_column_name(name) or 'c%d' % (i + 1)
+ for i, name in enumerate(fields)]
+ continue
if linenum > max_lines:
break
try:
@@ -48,24 +207,24 @@
for i, col_type in enumerate(col_types):
if not col_type:
col_types[i] = 'TEXT'
+ if not col_names:
+ col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
if column_names:
- col_names = []
if load_named_columns:
col_idx = []
+ cnames = []
for i, cname in enumerate(
[cn.strip() for cn in column_names.split(',')]):
if cname != '':
col_idx.append(i)
- col_names.append(cname)
+ cnames.append(cname)
col_types = [col_types[i] for i in col_idx]
+ col_names = cnames
else:
- col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
for i, cname in enumerate(
[cn.strip() for cn in column_names.split(',')]):
if cname and i < len(col_names):
col_names[i] = cname
- else:
- col_names = ['c%d' % i for i in range(1, len(col_types) + 1)]
col_def = []
for i, col_name in enumerate(col_names):
col_def.append('%s %s' % (col_names[i], col_types[i]))
@@ -74,12 +233,14 @@
def create_table(conn, file_path, table_name, skip=0, comment_char='#',
pkey_autoincr=None, column_names=None,
- load_named_columns=False, filters=None,
- unique_indexes=[], indexes=[]):
+ load_named_columns=False, firstlinenames=False,
+ filters=None, unique_indexes=[], indexes=[]):
col_names, col_types, col_def, col_idx = \
get_column_def(file_path, table_name, skip=skip,
comment_char=comment_char, column_names=column_names,
- load_named_columns=load_named_columns, filters=filters)
+ load_named_columns=load_named_columns,
+ firstlinenames=firstlinenames,
+ filters=filters)
col_func = [float if t == 'REAL' else int
if t == 'INTEGER' else str for t in col_types]
table_def = 'CREATE TABLE %s (\n %s%s\n);' % (
@@ -98,19 +259,13 @@
c.execute(table_def)
conn.commit()
c.close()
- for i, index in enumerate(unique_indexes):
- index_name = 'idx_uniq_%s_%d' % (table_name, i)
- index_columns = index.split(',')
- create_index(conn, table_name, index_name, index_columns,
- unique=True)
- for i, index in enumerate(indexes):
- index_name = 'idx_%s_%d' % (table_name, i)
- index_columns = index.split(',')
- create_index(conn, table_name, index_name, index_columns)
+
c = conn.cursor()
tr = TabularReader(file_path, skip=skip, comment_char=comment_char,
col_idx=col_idx, filters=filters)
for linenum, fields in enumerate(tr):
+ if linenum == 0 and firstlinenames:
+ continue
data_lines += 1
try:
vals = [col_func[i](x)
@@ -121,15 +276,28 @@
file=sys.stderr)
conn.commit()
c.close()
+ for i, index in enumerate(unique_indexes):
+ index_name = 'idx_uniq_%s_%d' % (table_name, i)
+ index_columns = index.split(',')
+ create_index(conn, table_name, index_name, index_columns,
+ unique=True)
+ for i, index in enumerate(indexes):
+ index_name = 'idx_%s_%d' % (table_name, i)
+ index_columns = index.split(',')
+ create_index(conn, table_name, index_name, index_columns)
except Exception as e:
exit('Error: %s' % (e))
def create_index(conn, table_name, index_name, index_columns, unique=False):
- index_def = "CREATE %s INDEX %s on %s(%s)" % (
+ index_def = 'CREATE %s INDEX %s on %s(%s)' % (
'UNIQUE' if unique else '', index_name,
table_name, ','.join(index_columns))
- c = conn.cursor()
- c.execute(index_def)
- conn.commit()
- c.close()
+ try:
+ c = conn.cursor()
+ c.execute(index_def)
+ conn.commit()
+ c.close()
+ except Exception as e:
+ print('Failed: %s err: %s' % (index_def, e), file=sys.stderr)
+ raise(e)
diff -r 859064f07be4 -r c1b700bc0150 macros.xml
--- a/macros.xml Tue Jul 18 09:07:26 2017 -0400
+++ b/macros.xml Fri Aug 18 16:48:20 2017 -0400
@@ -60,6 +60,52 @@
#end for
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -81,23 +127,7 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
+
diff -r 859064f07be4 -r c1b700bc0150 query_db.py
--- a/query_db.py Tue Jul 18 09:07:26 2017 -0400
+++ b/query_db.py Fri Aug 18 16:48:20 2017 -0400
@@ -56,12 +56,12 @@
exit(0)
-def run_query(conn, query, outputFile, no_header=False):
+def run_query(conn, query, outputFile, no_header=False, comment_char='#'):
cur = conn.cursor()
results = cur.execute(query)
if not no_header:
- outputFile.write("#%s\n" % '\t'.join(
- [str(col[0]) for col in cur.description]))
+ outputFile.write("%s%s\n" % (comment_char, '\t'.join(
+ [str(col[0]) for col in cur.description])))
for i, row in enumerate(results):
outputFile.write("%s\n" % '\t'.join(
[str(val) if val is not None else '' for val in row]))
diff -r 859064f07be4 -r c1b700bc0150 query_tabular.py
--- a/query_tabular.py Tue Jul 18 09:07:26 2017 -0400
+++ b/query_tabular.py Fri Aug 18 16:48:20 2017 -0400
@@ -56,6 +56,8 @@
parser.add_option('-n', '--no_header', dest='no_header', default=False,
action='store_true',
help='Include a column headers line')
+ parser.add_option('-c', '--comment_char', dest='comment_char', default='',
+ help='comment character to prefix column header line')
parser.add_option('-o', '--output', dest='output', default=None,
help='Output file for query results')
(options, args) = parser.parse_args()
@@ -80,6 +82,8 @@
table['comment_char'] if 'comment_char' in table else None
column_names =\
table['column_names'] if 'column_names' in table else None
+ firstlinenames =\
+ table['firstlinenames'] if 'firstlinenames' in table else False
if column_names:
load_named_columns =\
table['load_named_columns']\
@@ -93,6 +97,7 @@
table['pkey_autoincr'] if 'pkey_autoincr' in table else None
create_table(get_connection(options.sqlitedb), path, table_name,
pkey_autoincr=pkey_autoincr,
+ firstlinenames=firstlinenames,
column_names=column_names,
skip=comment_lines,
comment_char=comment_char,
@@ -128,7 +133,8 @@
else:
try:
run_query(get_connection(options.sqlitedb), query, outputFile,
- no_header=options.no_header)
+ no_header=options.no_header,
+ comment_char=options.comment_char)
except Exception as e:
exit('Error: %s' % (e))
diff -r 859064f07be4 -r c1b700bc0150 sqlite_to_tabular.py
--- a/sqlite_to_tabular.py Tue Jul 18 09:07:26 2017 -0400
+++ b/sqlite_to_tabular.py Fri Aug 18 16:48:20 2017 -0400
@@ -21,6 +21,8 @@
parser.add_option('-n', '--no_header', dest='no_header', default=False,
action='store_true',
help='Include a column headers line')
+ parser.add_option('-c', '--comment_char', dest='comment_char', default='',
+ help='comment character to prefix column header line')
parser.add_option('-o', '--output', dest='output', default=None,
help='Output file for query results')
(options, args) = parser.parse_args()
@@ -51,7 +53,8 @@
else:
try:
run_query(get_connection(options.sqlitedb), query, outputFile,
- no_header=options.no_header)
+ no_header=options.no_header,
+ comment_char=options.comment_char)
except Exception as e:
exit('Error: %s' % (e))
diff -r 859064f07be4 -r c1b700bc0150 sqlite_to_tabular.xml
--- a/sqlite_to_tabular.xml Tue Jul 18 09:07:26 2017 -0400
+++ b/sqlite_to_tabular.xml Fri Aug 18 16:48:20 2017 -0400
@@ -1,5 +1,5 @@
-
+
for SQL query
@@ -10,7 +10,7 @@
python '$__tool_directory__/sqlite_to_tabular.py'
--sqlitedb='$sqlitedb'
--query_file='$query_file'
- $no_header
+ @RESULT_HEADER@
--output='$query_results'
]]>
@@ -24,7 +24,7 @@
(?ims)^\s*SELECT\s.*\sFROM\s.*$
-
+
diff -r 859064f07be4 -r c1b700bc0150 test-data/psm_report.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psm_report.tsv Fri Aug 18 16:48:20 2017 -0400
@@ -0,0 +1,10 @@
+
+ Protein(s) Sequence AAs Before AAs After Position Modified Sequence Variable Modifications Fixed Modifications Spectrum File Spectrum Title Spectrum Scan Number RT m/z Measured Charge Identification Charge Theoretical Mass Isotope Number Precursor m/z Error [ppm] Localization Confidence Probabilistic PTM score D-score Confidence [%] Validation
+1 NP_116558 PYANQPTVR M IT 2 NH2-PYANQPTVR-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.04679.04679.2 4679 -1.0 523.272583 2+ 2+ 1044.53524305008 0 -4.4240452979909675 100.0 Doubtful
+2 NP_443137, NP_443137_S1016F DANTQVHTLR YK; YK KM; KM 443; 443 NH2-DANTQVHTLR-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.03894.03894.2 3894 -1.0 577.799622 2+ 2+ 1153.5839841476504 0 0.6117338355350196 95.0 Doubtful
+3 NP_066544_R21W SSWAGLQFPVGR TR VH 19 NH2-SSWAGLQFPVGR-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.20932.20932.2 20932 -1.0 652.843567 2+ 2+ 1303.6673198487902 0 4.029478922381296 100.0 Doubtful
+4 NP_112092_rs7285167:R182C AQACNLDQSGTNVAK AR VM 179 NH2-AQACNLDQSGTNVAK-COOH Carbamidomethylation of C(4) trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.05257.05257.2 5257 -1.0 788.87384 2+ 2+ 1575.7311189571503 0 1.2727711831177586 100.0 Doubtful
+5 NP_001273432_F75C, NP_001273433_F77C, NP_001273434_F77C, NP_006635_F75C ACNDPFIQK GR; GR; GR; GR EK; EK; EK; EK 74; 76; 76; 74 NH2-ACNDPFIQK-COOH Carbamidomethylation of C(2) trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.09560.09560.2 9560 -1.0 546.760803 2+ 2+ 1091.50697969691 0 0.06709466946167521 94.73684210526316 Doubtful
+6 NP_001028196, NP_001244919, NP_036311 LQLLPESFICK KR; KR; KR EK; EK; EK 198; 163; 277 NH2-LQLLPESFICK-COOH Carbamidomethylation of C(10) trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.22345.22345.2 22345 -1.0 674.370911 2+ 2+ 1346.7268088708604 0 0.3412036795550375 99.38650306748467 Doubtful
+7 NP_001909_rs72973763:I239V,rs12021720:S384G DMTVPVLVSKPPVFTGK PK DK 234 NH2-DMTVPVLVSKPPVFTGK-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.20464.20464.2 20464 -1.0 908.008545 2+ 2+ 1814.0011927988103 0 0.7402290254293575 94.73684210526316 Doubtful
+8 NP_001265138, NP_001265138_rs2274136:A858T,F879L, NP_001265139, NP_001265139_rs2274136:A785T,F806L, NP_005115, NP_005115_rs2274136:A827T,F848L FGVSSESKPEEVK FK; FK; FK; FK; FK; FK KD; KD; KD; KD; KD; KD 992; 992; 919; 919; 961; 961 NH2-FGVSSESKPEEVK-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.06665.06665.2 6665 -1.0 711.858643 2+ 2+ 1421.7038244968105 0 -0.7666055674357796 100.0 Doubtful
diff -r 859064f07be4 -r c1b700bc0150 test-data/psm_report_out1.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psm_report_out1.tsv Fri Aug 18 16:48:20 2017 -0400
@@ -0,0 +1,9 @@
+Scan Sequence Protein(s) Position m/z Precursor m/z Error
+1 PYANQPTVR NP_116558 2 523.272583 -4.42404529799
+2 DANTQVHTLR NP_443137, NP_443137_S1016F 443; 443 577.799622 0.611733835535
+3 SSWAGLQFPVGR NP_066544_R21W 19 652.843567 4.02947892238
+4 AQACNLDQSGTNVAK NP_112092_rs7285167:R182C 179 788.87384 1.27277118312
+5 ACNDPFIQK NP_001273432_F75C, NP_001273433_F77C, NP_001273434_F77C, NP_006635_F75C 74; 76; 76; 74 546.760803 0.0670946694617
+6 LQLLPESFICK NP_001028196, NP_001244919, NP_036311 198; 163; 277 674.370911 0.341203679555
+7 DMTVPVLVSKPPVFTGK NP_001909_rs72973763:I239V,rs12021720:S384G 234 908.008545 0.740229025429
+8 FGVSSESKPEEVK NP_001265138, NP_001265138_rs2274136:A858T,F879L, NP_001265139, NP_001265139_rs2274136:A785T,F806L, NP_005115, NP_005115_rs2274136:A827T,F848L 992; 992; 919; 919; 961; 961 711.858643 -0.766605567436
diff -r 859064f07be4 -r c1b700bc0150 test-data/psm_report_out2.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/psm_report_out2.tsv Fri Aug 18 16:48:20 2017 -0400
@@ -0,0 +1,5 @@
+Scan m/z Precursor m/z Error Sequence Protein(s)
+1 523.272583 -4.42404529799 PYANQPTVR NP_116558
+3 652.843567 4.02947892238 SSWAGLQFPVGR NP_066544_R21W
+4 788.87384 1.27277118312 AQACNLDQSGTNVAK NP_112092_rs7285167:R182C
+7 908.008545 0.740229025429 DMTVPVLVSKPPVFTGK NP_001909_rs72973763:I239V,rs12021720:S384G