# HG changeset patch
# User iuc
# Date 1629402025 0
# Node ID c29d2f80a0661f429a554ec44282ecf6f9fb93b8
# Parent 575cd6cd537c2690704c46fe65365a18efe8d44e
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/query_tabular commit 35576d64a12fa664d72559172c5960c09da2b632"
diff -r 575cd6cd537c -r c29d2f80a066 filter_tabular.py
--- a/filter_tabular.py Sat Jun 19 14:16:32 2021 +0000
+++ b/filter_tabular.py Thu Aug 19 19:40:25 2021 +0000
@@ -19,6 +19,8 @@
help='JSON array of filter specifications')
parser.add_option('-o', '--output', dest='output', default=None,
help='Output file for query results')
+ parser.add_option('-c', '--comment_char)', dest='comment_char', default=None,
+ help='Line comment character')
parser.add_option('-v', '--verbose', dest='verbose', default=False,
action='store_true',
help='verbose')
@@ -59,7 +61,7 @@
file=sys.stdout)
try:
- filter_file(inputFile, outputFile, filters=filters)
+ filter_file(inputFile, outputFile, comment_char=options.comment_char, filters=filters)
except Exception as e:
exit('Error: %s' % (e))
diff -r 575cd6cd537c -r c29d2f80a066 filters.py
--- a/filters.py Sat Jun 19 14:16:32 2021 +0000
+++ b/filters.py Thu Aug 19 19:40:25 2021 +0000
@@ -4,6 +4,7 @@
import re
import sys
+from itertools import chain
class LineFilter(object):
@@ -13,6 +14,15 @@
self.func = lambda i, l: l.rstrip('\r\n') if l else None
self.src_lines = []
self.src_line_cnt = 0
+
+ def xint(x):
+ if isinstance(x, int):
+ return x
+ try:
+ return int(x)
+ except Exception:
+ return x if x else None
+
if not filter_dict:
return
if filter_dict['filter'] == 'regex':
@@ -28,6 +38,13 @@
elif filter_dict['filter'] == 'select_columns':
cols = [int(c) - 1 for c in filter_dict['columns']]
self.func = lambda i, l: self.select_columns(l, cols)
+ elif filter_dict['filter'] == 'select_column_slices':
+ cols = [x if isinstance(x, int) else [y if y is not None else None for y in [xint(k) for k in x.split(':')]] for x in [xint(c) for c in filter_dict['columns']]]
+ if all([isinstance(x, int) for x in cols]):
+ self.func = lambda i, l: self.select_columns(l, cols)
+ else:
+ cols = [slice(x[0], x[1], x[2] if len(x) > 2 else None) if isinstance(x, list) else x for x in cols]
+ self.func = lambda i, l: self.select_slices(l, cols)
elif filter_dict['filter'] == 'replace':
p = filter_dict['pattern']
r = filter_dict['replace']
@@ -80,6 +97,10 @@
fields = line.split('\t')
return '\t'.join([fields[x] for x in cols])
+ def select_slices(self, line, cols):
+ fields = line.split('\t')
+ return '\t'.join(chain.from_iterable([y if isinstance(y, list) else [y] for y in [fields[x] for x in cols]]))
+
def replace_add(self, line, pat, rep, col, pos):
fields = line.rstrip('\r\n').split('\t')
i = pos if pos is not None else len(fields)
diff -r 575cd6cd537c -r c29d2f80a066 macros.xml
--- a/macros.xml Sat Jun 19 14:16:32 2021 +0000
+++ b/macros.xml Thu Aug 19 19:40:25 2021 +0000
@@ -32,7 +32,12 @@
#elif $fi.filter.filter_type == 'select_columns':
#set $filter_dict = dict()
#set $filter_dict['filter'] = str($fi.filter.filter_type)
- #set $filter_dict['columns'] = [int(str($ci).replace('c','')) for $ci in str($fi.filter.columns).split(',')]
+ #set $filter_dict['columns'] = [int($c) for $c in str($fi.filter.columns).replace('c','').split(',')]
+ #silent $input_filters.append($filter_dict)
+ #elif $fi.filter.filter_type == 'select_column_slices':
+ #set $filter_dict = dict()
+ #set $filter_dict['filter'] = str($fi.filter.filter_type)
+ #set $filter_dict['columns'] = [$c for $c in str($fi.filter.columns).split(',')]
#silent $input_filters.append($filter_dict)
#elif $fi.filter.filter_type == 'replace':
#set $filter_dict = dict()
@@ -53,6 +58,11 @@
#set $filter_dict['filter'] = str($fi.filter.filter_type)
#set $filter_dict['column_text'] = str($fi.filter.column_text)
#silent $input_filters.append($filter_dict)
+ #elif str($fi.filter.filter_type).endswith('pend_dataset_name'):
+ #set $filter_dict = dict()
+ #set $filter_dict['filter'] = str($fi.filter.filter_type).replace('dataset_name', 'text')
+ #set $filter_dict['column_text'] = $dataset_name
+ #silent $input_filters.append($filter_dict)
#elif $fi.filter.filter_type == 'normalize':
#set $filter_dict = dict()
#set $filter_dict['filter'] = str($fi.filter.filter_type)
@@ -129,9 +139,12 @@
+
+
+
@@ -147,6 +160,8 @@
+
+
@@ -172,6 +187,26 @@
^(c?[1-9]\d*)(,c?[1-9]\d*)*$
+
+
+
+ Column offset indexes: 0,3,1 (selects the first,fourth, and second columns)
+ Negative column numbers: -1,-2 (selects the last, and second last columns)
+ python slices ( slice(start, stop[, step]) select a range of columns):
+
+ - 0:3 or :3 (selects the first 3 columns)
+ - 3:5 (selects the fourth and fifth columns)
+ - 2: (selects all columns after the second)
+ - -2: (selects the last 2 columns)
+ - 2::-1 (selects the first 3 columns n reverse order: third,second,first)
+
+
+ ]]>
+ ^(-?[1-9]\d*|((-?\d+)?:(-?\d*(:-?\d*)?)))(,(-?[1-9]\d*|((-?\d+)?:(-?\d*(:-?\d*)?))))*$
+
+
+
@@ -211,21 +246,24 @@
@@ -284,13 +322,13 @@
0 Jane Doe 1978-05-24 5 3
1 James Smith 1980-10-20 Spot 6 4
- Filter 6 - append a line number column:
+ Filter 6 - select columns by indices/slices: '1:6'
- 2 Paula Brown 1978-05-24 Rex dog 3 1 1
- 2 Paula Brown 1978-05-24 Fluff cat 3 1 2
- 1 Steven Jones 1974-04-04 Allie cat 4 2 3
- 0 Jane Doe 1978-05-24 5 3 4
- 1 James Smith 1980-10-20 Spot 6 4 5
+ Paula Brown 1978-05-24 Rex dog
+ Paula Brown 1978-05-24 Fluff cat
+ Steven Jones 1974-04-04 Allie cat
+ Jane Doe 1978-05-24
+ James Smith 1980-10-20 Spot
]]>
diff -r 575cd6cd537c -r c29d2f80a066 test-data/filtered_IEDB.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtered_IEDB.tsv Thu Aug 19 19:40:25 2021 +0000
@@ -0,0 +1,17 @@
+peptide ID allele seq_num start end length percentile_rank comblib_sidney2008_rank IEDB.tsv
+GMYCMVFLV PPAP2C HLA-A*02:01 1 3 11 9 0.2 0.5 IEDB.tsv
+SFGMYCMVF PPAP2C HLA-A*23:01 1 1 9 9 0.5 - IEDB.tsv
+MYCMVFLVK PPAP2C HLA-A*23:01 1 4 12 9 0.65 - IEDB.tsv
+FGMYCMVFL PPAP2C HLA-A*02:01 1 2 10 9 2.3 1.3 IEDB.tsv
+GMYCMVFLV PPAP2C HLA-A*23:01 1 3 11 9 4.95 - IEDB.tsv
+FGMYCMVFL PPAP2C HLA-A*23:01 1 2 10 9 6.55 - IEDB.tsv
+SFGMYCMVF PPAP2C HLA-A*02:01 1 1 9 9 45 91 IEDB.tsv
+MYCMVFLVK PPAP2C HLA-A*02:01 1 4 12 9 54 86 IEDB.tsv
+SLDMCISGL ADAMTSL1 HLA-A*02:01 1 1 9 9 1 1.7 IEDB.tsv
+MCISGLCQL ADAMTSL1 HLA-A*23:01 1 4 12 9 6.65 - IEDB.tsv
+MCISGLCQL ADAMTSL1 HLA-A*02:01 1 4 12 9 14 24 IEDB.tsv
+SLDMCISGL ADAMTSL1 HLA-A*23:01 1 1 9 9 30.5 - IEDB.tsv
+LDMCISGLC ADAMTSL1 HLA-A*02:01 1 2 10 9 42 71 IEDB.tsv
+DMCISGLCQ ADAMTSL1 HLA-A*23:01 1 3 11 9 64.5 - IEDB.tsv
+LDMCISGLC ADAMTSL1 HLA-A*23:01 1 2 10 9 76.0 - IEDB.tsv
+DMCISGLCQ ADAMTSL1 HLA-A*02:01 1 3 11 9 97 97 IEDB.tsv
diff -r 575cd6cd537c -r c29d2f80a066 test-data/psm_dbmod_output.tsv
--- a/test-data/psm_dbmod_output.tsv Sat Jun 19 14:16:32 2021 +0000
+++ b/test-data/psm_dbmod_output.tsv Thu Aug 19 19:40:25 2021 +0000
@@ -1,4 +1,4 @@
-#scan m\/z Precursor m\/z Error Sequence Protein\(s\) confidence
+#scan m\/z Precursor m\/z Error( \[ppm])? Sequence Protein\(s\) confidence
1 523.27\d* -4.42\d* PYANQPTVR NP_116558 99.9\d*
3 652.84\d* 4.02\d* SSWAGLQFPVGR NP_066544_R21W 99.9\d*
4 788.87\d* 1.27\d* AQACNLDQSGTNVAK NP_112092_rs7285167:R182C 99.9\d*
diff -r 575cd6cd537c -r c29d2f80a066 test-data/psm_dbmod_output1.tsv
--- a/test-data/psm_dbmod_output1.tsv Sat Jun 19 14:16:32 2021 +0000
+++ b/test-data/psm_dbmod_output1.tsv Thu Aug 19 19:40:25 2021 +0000
@@ -1,4 +1,4 @@
-scan Sequence Protein\(s\) Position m\/z Precursor m\/z Error confidence
+scan Sequence Protein\(s\) Position m\/z Precursor m\/z Error( \[ppm])? confidence
1 PYANQPTVR NP_116558 2 523.27\d* -4.42\d* 99.99\d*
3 SSWAGLQFPVGR NP_066544_R21W 19 652.84\d* 4.02\d* 99.99\d*
4 AQACNLDQSGTNVAK NP_112092_rs7285167:R182C 179 788.87\d* 1.27\d* 99.99\d*
diff -r 575cd6cd537c -r c29d2f80a066 test-data/psm_report.tsv
--- a/test-data/psm_report.tsv Sat Jun 19 14:16:32 2021 +0000
+++ b/test-data/psm_report.tsv Thu Aug 19 19:40:25 2021 +0000
@@ -1,4 +1,3 @@
-
Protein(s) Sequence AAs Before AAs After Position Modified Sequence Variable Modifications Fixed Modifications Spectrum File Spectrum Title Spectrum Scan Number RT m/z Measured Charge Identification Charge Theoretical Mass Isotope Number Precursor m/z Error [ppm] Localization Confidence Probabilistic PTM score D-score Confidence [%] Validation
1 NP_116558 PYANQPTVR M IT 2 NH2-PYANQPTVR-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.04679.04679.2 4679 -1.0 523.272583 2+ 2+ 1044.53524305008 0 -4.4240452979909675 100.0 Doubtful
2 NP_443137, NP_443137_S1016F DANTQVHTLR YK; YK KM; KM 443; 443 NH2-DANTQVHTLR-COOH trimmed_tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.mgf tgriffin_cguerrer_20150826_RP_MCF7_hipH_frac_12n28.03894.03894.2 3894 -1.0 577.799622 2+ 2+ 1153.5839841476504 0 0.6117338355350196 95.0 Doubtful
diff -r 575cd6cd537c -r c29d2f80a066 test-data/psm_report_out2.tsv
--- a/test-data/psm_report_out2.tsv Sat Jun 19 14:16:32 2021 +0000
+++ b/test-data/psm_report_out2.tsv Thu Aug 19 19:40:25 2021 +0000
@@ -1,4 +1,4 @@
-Scan m\/z Precursor m\/z Error Sequence Protein\(s\)
+Scan m\/z Precursor m\/z Error( \[ppm])? Sequence Protein\(s\)
1 523.27\d* -4.42\d* PYANQPTVR NP_116558
3 652.84\d* 4.02\d* SSWAGLQFPVGR NP_066544_R21W
4 788.87\d* 1.27\d* AQACNLDQSGTNVAK NP_112092_rs7285167:R182C