# HG changeset patch
# User galaxyp
# Date 1500888322 14400
# Node ID 8a30d6e5b97ddcf2a9c6ed02f787b233c921f954
# Parent 34c5c95740a17cbc3835ca179042924b78a72a18
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
diff -r 34c5c95740a1 -r 8a30d6e5b97d README.rst
--- a/README.rst Mon May 22 05:08:23 2017 -0400
+++ b/README.rst Mon Jul 24 05:25:22 2017 -0400
@@ -1,4 +1,4 @@
-GalaxyP - Percolator
+GalaxyP - HiRIEF tools
=======================
- Home:
diff -r 34c5c95740a1 -r 8a30d6e5b97d __pycache__/peptide_pi_annotator.cpython-36.pyc
Binary file __pycache__/peptide_pi_annotator.cpython-36.pyc has changed
diff -r 34c5c95740a1 -r 8a30d6e5b97d delta_pi_calc.xml
--- a/delta_pi_calc.xml Mon May 22 05:08:23 2017 -0400
+++ b/delta_pi_calc.xml Mon Jul 24 05:25:22 2017 -0400
@@ -1,11 +1,25 @@
-
+
python
to peptide table
- python '$__tool_directory__/peptide_pi_annotator.py' -i '$trainingpi' -p '$peptable'
- --stripcol $stripcol --pepcol $pepcol --fraccol $fraccol --out '$output'
+ python '$__tool_directory__/peptide_pi_annotator.py' -i '$trainingpi' -p '$peptable' --out '$output'
+ #if $stripcol
+ --stripcol $stripcol
+ #else if $stripcolpattern
+ --stripcolpattern '$stripcolpattern'
+ #end if
+ #if $pepcol
+ --pepcol $pepcol
+ #else if $pepcolpattern
+ --pepcolpattern '$pepcolpattern'
+ #end if
+ #if $fraccol
+ --fraccol $fraccol
+ #else if $fraccolpattern
+ --fraccolpattern '$fraccolpattern'
+ #end if
--strippatterns
#for $strip in $strips
@@ -42,9 +56,12 @@
-
-
-
+
+
+
+
+
+
@@ -83,6 +100,27 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 34c5c95740a1 -r 8a30d6e5b97d peptide_pi_annotator.py
--- a/peptide_pi_annotator.py Mon May 22 05:08:23 2017 -0400
+++ b/peptide_pi_annotator.py Mon Jul 24 05:25:22 2017 -0400
@@ -10,13 +10,34 @@
sys.argv.append('-h')
args = parse_commandline()
strips = {}
+ if args.frac_col > 0:
+ frac_col = args.frac_col - 1
+ elif args.frac_col:
+ frac_col = args.frac_col
+ elif args.frac_colpattern:
+ frac_col = get_col_by_pattern(args.peptable, args.frac_colpattern)
+ else:
+ raise RuntimeError('Must define fraction column')
+ if args.stripcol > 0:
+ stripcol = args.stripcol - 1
+ elif args.stripcol:
+ stripcol = args.stripcol
+ elif args.stripcolpattern:
+ stripcol = get_col_by_pattern(args.peptable, args.stripcolpattern)
+ else:
+ raise RuntimeError('Must define strip column')
+ if args.pepcol:
+ pepcol = args.pepcol - 1
+ elif args.pepcolpattern:
+ pepcol = get_col_by_pattern(args.peptable, args.pepcolpattern)
+ else:
+ raise RuntimeError('Must define peptide sequence column')
for i, strip in enumerate(args.pipatterns):
strips[strip] = {'intercept': args.intercepts[i],
'fr_width': args.fr_width[i]}
with open(args.outpeptable, 'w') as fp:
- for outline in annotate_peptable(args.pipeps, args.peptable,
- args.pepcol, args.frac_col,
- args.stripcol, strips,
+ for outline in annotate_peptable(args.pipeps, args.peptable, pepcol,
+ frac_col, stripcol, strips,
args.ignoremods):
fp.write('\t'.join([str(x) for x in outline]))
fp.write('\n')
@@ -29,10 +50,16 @@
return False
+def get_col_by_pattern(peptable, colpattern):
+ with open(peptable) as fp:
+ header = next(fp).strip('\n').split('\t')
+ for ix, field in enumerate(header):
+ if colpattern in field:
+ return ix
+
+
def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol,
strips, ignoremods):
- if frac_col > 0:
- frac_col -= 1
predicted_peps = {}
with open(predicted_peps_fn) as fp:
for line in fp:
@@ -45,11 +72,11 @@
for line in fp:
line = line.strip('\n').split('\t')
strip = strips[get_first_matching_pattern(strips.keys(),
- line[stripcol - 1])]
+ line[stripcol])]
exp_pi = (strip['fr_width'] * int(line[frac_col]) +
strip['intercept'])
- sequence = line[seqcol - 1]
+ sequence = line[seqcol]
for weight in ignoremods:
if weight == '*':
regex = '[+-]\d*\.\d*'
@@ -81,15 +108,24 @@
'pI shift.')
parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file '
'with peptide seq, pI value')
+ parser.add_argument('--pepcolpattern', dest='pepcolpattern',
+ help='Peptide sequence column pattern in peptide '
+ 'table.', default=False, type=str)
parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence '
'column number in peptide table. First column is 1.',
default=False, type=int)
+ parser.add_argument('--fraccolpattern', dest='frac_colpattern',
+ help='Fraction number column pattern in peptide '
+ 'table.', default=False, type=str)
parser.add_argument('--fraccol', dest='frac_col', help='Fraction number '
'column number in peptide table. First column is 1.',
- type=int)
+ default=False, type=int)
parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to '
'identify modification weights to be ignored.',
default=[], nargs='+', type=str)
+ parser.add_argument('--stripcolpattern', dest='stripcolpattern',
+ help='Strip name column pattern in peptide '
+ 'table.', type=str, default=False)
parser.add_argument('--stripcol', dest='stripcol', help='Strip name '
'column number in peptide table. Will be used to '
'detect strips if multiple are present using pattern '
diff -r 34c5c95740a1 -r 8a30d6e5b97d pi_database_splitter.py
--- a/pi_database_splitter.py Mon May 22 05:08:23 2017 -0400
+++ b/pi_database_splitter.py Mon Jul 24 05:25:22 2017 -0400
@@ -4,6 +4,8 @@
from numpy import median
from contextlib import ExitStack
+from peptide_pi_annotator import get_col_by_pattern
+
def main():
if sys.argv[1:] == []:
@@ -14,10 +16,23 @@
# Column nrs should start from 0
# If negative, -1 is last item in list, etc
if args.fdrcol > 0:
- args.fdrcol -= 1
+ fdrcol = args.fdrcol - 1
+ elif args.fdrcol:
+ fdrcol = args.fdrcol
+ elif args.fdrcolpattern:
+ fdrcol = get_col_by_pattern(args.train_peptable, args.fdrcolpattern)
+ else:
+ fdrcol = False
if args.deltapicol > 0:
- args.deltapicol -= 1
- pishift = get_pishift(args.train_peptable, args.fdrcol, args.deltapicol,
+ deltapicol = args.deltapicol - 1
+ elif args.deltapicol:
+ deltapicol = args.deltapicol
+ elif args.deltapicolpattern:
+ deltapicol = get_col_by_pattern(args.train_peptable,
+ args.deltapicolpattern)
+ else:
+ deltapicol = False
+ pishift = get_pishift(args.train_peptable, fdrcol, deltapicol,
args.fdrcutoff, args.picutoff)
binarray = get_bin_array(args.fr_amount, args.fr_width, args.intercept,
args.tolerance, pishift)
@@ -60,9 +75,15 @@
' number in peptide table. First column is nr. 1. '
'Negative number for counting from last col '
'(-1 is last).', default=False, type=int)
+ parser.add_argument('--deltacolpattern', dest='deltapicolpattern',
+ help='Delta pI column header pattern in peptide '
+ 'table.', default=False, type=str)
parser.add_argument('--picutoff', dest='picutoff',
help='delta pI value to filter experimental peptides'
' when calculating pi shift.', default=0.2, type=float)
+ parser.add_argument('--fdrcolpattern', dest='fdrcolpattern',
+ help='FDR column header pattern in peptide table.',
+ default=False, type=str)
parser.add_argument('--fdrcol', dest='fdrcol', help='FDR column number in '
'peptide table. First column is nr. 1. Empty includes '
'all peptides', default=False, type=int)
@@ -84,7 +105,7 @@
help='pI Intercept of strip', type=float)
parser.add_argument('--width', dest='fr_width',
help='Strip fraction width in pI', type=float)
- parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length',
+ parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length',
type=int)
parser.add_argument('--maxlen', dest='maxlen', help='Maximal peptide length',
type=int, default=False)
@@ -146,7 +167,7 @@
accs, pep, pi = line.strip().split("\t")
pi = float(pi)
if maxlen and len(pep) > maxlen:
- continue
+ continue
elif len(pep) >= minlen:
pepcount += 1
if pep[-1] in {'K', 'R'}:
diff -r 34c5c95740a1 -r 8a30d6e5b97d pi_db_split.xml
--- a/pi_db_split.xml Mon May 22 05:08:23 2017 -0400
+++ b/pi_db_split.xml Mon Jul 24 05:25:22 2017 -0400
@@ -1,4 +1,4 @@
-
+
into pI separated fractions
numpy
@@ -8,14 +8,26 @@
-
+
+
-
-
+
+
-
-
-
-
-
+
+
+
+
+
+
+
+
+
@@ -54,13 +71,42 @@
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+