annotate peptide_pi_annotator.py @ 3:78afc81ab244 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
author galaxyp
date Thu, 14 Sep 2017 11:55:02 -0400
parents 8a30d6e5b97d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
2
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
3 import re
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
4 import sys
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
5 import argparse
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
6
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
7
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
8 def main():
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
9 if sys.argv[1:] == []:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
10 sys.argv.append('-h')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
11 args = parse_commandline()
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
12 strips = {}
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
13 if args.frac_col > 0:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
14 frac_col = args.frac_col - 1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
15 elif args.frac_col:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
16 frac_col = args.frac_col
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
17 elif args.frac_colpattern:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
18 frac_col = get_col_by_pattern(args.peptable, args.frac_colpattern)
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
19 else:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
20 raise RuntimeError('Must define fraction column')
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
21 if args.stripcol > 0:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
22 stripcol = args.stripcol - 1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
23 elif args.stripcol:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
24 stripcol = args.stripcol
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
25 elif args.stripcolpattern:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
26 stripcol = get_col_by_pattern(args.peptable, args.stripcolpattern)
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
27 else:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
28 raise RuntimeError('Must define strip column')
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
29 if args.pepcol:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
30 pepcol = args.pepcol - 1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
31 elif args.pepcolpattern:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
32 pepcol = get_col_by_pattern(args.peptable, args.pepcolpattern)
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
33 else:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
34 raise RuntimeError('Must define peptide sequence column')
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
35 for i, strip in enumerate(args.pipatterns):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
36 strips[strip] = {'intercept': args.intercepts[i],
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
37 'fr_width': args.fr_width[i]}
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
38 with open(args.outpeptable, 'w') as fp:
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
39 for outline in annotate_peptable(args.pipeps, args.peptable, pepcol,
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
40 frac_col, stripcol, strips,
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
41 args.ignoremods):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
42 fp.write('\t'.join([str(x) for x in outline]))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
43 fp.write('\n')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
44
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
45
3
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
46 def get_strip(strips, string):
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
47 for pattern in strips.keys():
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
48 if re.search(pattern, string):
3
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
49 return strips[pattern]
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
50 return False
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
51
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
52
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
53 def get_col_by_pattern(peptable, colpattern):
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
54 with open(peptable) as fp:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
55 header = next(fp).strip('\n').split('\t')
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
56 for ix, field in enumerate(header):
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
57 if colpattern in field:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
58 return ix
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
59
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
60
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
61 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol,
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
62 strips, ignoremods):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
63 predicted_peps = {}
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
64 with open(predicted_peps_fn) as fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
65 for line in fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
66 line = line.strip('\n').split('\t')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
67 predicted_peps[line[0]] = line[1]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
68 not_predicted_count, predicted_count = 0, 0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
69 with open(peptable) as fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
70 header = next(fp).strip('\n').split('\t')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
71 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI']
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
72 for line in fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
73 line = line.strip('\n').split('\t')
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
74 sequence = line[seqcol]
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
75 for weight in ignoremods:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
76 if weight == '*':
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
77 regex = '[+-]\d*\.\d*'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
78 else:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
79 regex = '[+-]{}'.format(weight)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
80 sequence = re.sub(regex, '', sequence)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
81 try:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
82 pred_pi = float(predicted_peps[sequence])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
83 except KeyError:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
84 print('CANNOT PREDICT', sequence)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
85 not_predicted_count += 1
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
86 pred_pi, delta_pi = 'NA', 'NA'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
87 else:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
88 predicted_count += 1
3
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
89 strip = get_strip(strips, line[stripcol])
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
90 if not strip:
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
91 exp_pi, delta_pi = 'NA', 'NA'
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
92 else:
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
93 try:
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
94 exp_pi = (strip['fr_width'] * int(line[frac_col]) +
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
95 strip['intercept'])
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
96 except ValueError:
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
97 print('Cannot detect fraction for PSM {}'.format(sequence))
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
98 exp_pi, delta_pi = 'NA', 'NA'
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
99 else:
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
100 if pred_pi != 'NA':
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
101 delta_pi = exp_pi - pred_pi
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
102 else:
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
103 delta_pi = 'NA'
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
104 yield line + [exp_pi, pred_pi, delta_pi]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
105 print('Number of peptides without pI prediction: {}\n'
3
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
106 'Number of peptides predicted: {}\n'.format(not_predicted_count,
78afc81ab244 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents: 1
diff changeset
107 predicted_count))
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
108
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
109
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
110 def parse_commandline():
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
111 parser = argparse.ArgumentParser(
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
112 formatter_class=argparse.RawTextHelpFormatter)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
113 parser.add_argument('--out', dest='outpeptable', help='Output peptide '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
114 'table')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
115 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
116 'peptides, FDR, fraction numbers. Used to calculate'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
117 'pI shift.')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
118 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
119 'with peptide seq, pI value')
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
120 parser.add_argument('--pepcolpattern', dest='pepcolpattern',
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
121 help='Peptide sequence column pattern in peptide '
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
122 'table.', default=False, type=str)
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
123 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
124 'column number in peptide table. First column is 1.',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
125 default=False, type=int)
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
126 parser.add_argument('--fraccolpattern', dest='frac_colpattern',
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
127 help='Fraction number column pattern in peptide '
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
128 'table.', default=False, type=str)
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
129 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
130 'column number in peptide table. First column is 1.',
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
131 default=False, type=int)
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
132 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
133 'identify modification weights to be ignored.',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
134 default=[], nargs='+', type=str)
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
135 parser.add_argument('--stripcolpattern', dest='stripcolpattern',
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
136 help='Strip name column pattern in peptide '
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
137 'table.', type=str, default=False)
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
138 parser.add_argument('--stripcol', dest='stripcol', help='Strip name '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
139 'column number in peptide table. Will be used to '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
140 'detect strips if multiple are present using pattern '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
141 'passed with --strippatterns. First column is nr. 1.',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
142 default=False, type=int)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
143 parser.add_argument('--strippatterns', dest='pipatterns',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
144 help='Patterns to detect different pI ranges from e.g.'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
145 ' file name in peptide table', nargs='+')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
146 parser.add_argument('--intercepts', dest='intercepts',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
147 help='pI Intercept of strips', nargs='+', type=float)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
148 parser.add_argument('--widths', dest='fr_width', nargs='+',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
149 help='Strip fraction widths in pI', type=float)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
150 return parser.parse_args(sys.argv[1:])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
151
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
152
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
153 if __name__ == '__main__':
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
154 main()