annotate peptide_pi_annotator.py @ 2:77ddaee887a8 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit 71a4265d11aef48342142b8cf2caa86f79f9a554
author galaxyp
date Fri, 01 Sep 2017 03:14:54 -0400
parents 8a30d6e5b97d
children 78afc81ab244
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
2
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
3 import re
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
4 import sys
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
5 import argparse
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
6
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
7
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
8 def main():
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
9 if sys.argv[1:] == []:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
10 sys.argv.append('-h')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
11 args = parse_commandline()
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
12 strips = {}
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
13 if args.frac_col > 0:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
14 frac_col = args.frac_col - 1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
15 elif args.frac_col:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
16 frac_col = args.frac_col
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
17 elif args.frac_colpattern:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
18 frac_col = get_col_by_pattern(args.peptable, args.frac_colpattern)
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
19 else:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
20 raise RuntimeError('Must define fraction column')
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
21 if args.stripcol > 0:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
22 stripcol = args.stripcol - 1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
23 elif args.stripcol:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
24 stripcol = args.stripcol
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
25 elif args.stripcolpattern:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
26 stripcol = get_col_by_pattern(args.peptable, args.stripcolpattern)
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
27 else:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
28 raise RuntimeError('Must define strip column')
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
29 if args.pepcol:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
30 pepcol = args.pepcol - 1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
31 elif args.pepcolpattern:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
32 pepcol = get_col_by_pattern(args.peptable, args.pepcolpattern)
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
33 else:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
34 raise RuntimeError('Must define peptide sequence column')
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
35 for i, strip in enumerate(args.pipatterns):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
36 strips[strip] = {'intercept': args.intercepts[i],
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
37 'fr_width': args.fr_width[i]}
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
38 with open(args.outpeptable, 'w') as fp:
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
39 for outline in annotate_peptable(args.pipeps, args.peptable, pepcol,
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
40 frac_col, stripcol, strips,
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
41 args.ignoremods):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
42 fp.write('\t'.join([str(x) for x in outline]))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
43 fp.write('\n')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
44
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
45
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
46 def get_first_matching_pattern(patterns, string):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
47 for pattern in patterns:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
48 if re.search(pattern, string):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
49 return pattern
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
50 return False
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
51
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
52
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
53 def get_col_by_pattern(peptable, colpattern):
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
54 with open(peptable) as fp:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
55 header = next(fp).strip('\n').split('\t')
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
56 for ix, field in enumerate(header):
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
57 if colpattern in field:
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
58 return ix
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
59
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
60
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
61 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol,
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
62 strips, ignoremods):
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
63 predicted_peps = {}
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
64 with open(predicted_peps_fn) as fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
65 for line in fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
66 line = line.strip('\n').split('\t')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
67 predicted_peps[line[0]] = line[1]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
68 not_predicted_count, predicted_count = 0, 0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
69 with open(peptable) as fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
70 header = next(fp).strip('\n').split('\t')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
71 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI']
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
72 for line in fp:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
73 line = line.strip('\n').split('\t')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
74 strip = strips[get_first_matching_pattern(strips.keys(),
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
75 line[stripcol])]
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
76 exp_pi = (strip['fr_width'] * int(line[frac_col]) +
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
77 strip['intercept'])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
78
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
79 sequence = line[seqcol]
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
80 for weight in ignoremods:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
81 if weight == '*':
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
82 regex = '[+-]\d*\.\d*'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
83 else:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
84 regex = '[+-]{}'.format(weight)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
85 sequence = re.sub(regex, '', sequence)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
86 try:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
87 pred_pi = float(predicted_peps[sequence])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
88 except KeyError:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
89 print('CANNOT PREDICT', sequence)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
90 not_predicted_count += 1
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
91 pred_pi, delta_pi = 'NA', 'NA'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
92 else:
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
93 delta_pi = exp_pi - pred_pi
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
94 predicted_count += 1
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
95 yield line + [exp_pi, pred_pi, delta_pi]
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
96 print('Number of peptides without pI prediction: {}\n'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
97 'Number of peptides with predicion: {}\n'.format(not_predicted_count,
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
98 predicted_count))
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
99
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
100
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
101 def parse_commandline():
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
102 parser = argparse.ArgumentParser(
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
103 formatter_class=argparse.RawTextHelpFormatter)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
104 parser.add_argument('--out', dest='outpeptable', help='Output peptide '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
105 'table')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
106 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
107 'peptides, FDR, fraction numbers. Used to calculate'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
108 'pI shift.')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
109 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
110 'with peptide seq, pI value')
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
111 parser.add_argument('--pepcolpattern', dest='pepcolpattern',
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
112 help='Peptide sequence column pattern in peptide '
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
113 'table.', default=False, type=str)
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
114 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
115 'column number in peptide table. First column is 1.',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
116 default=False, type=int)
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
117 parser.add_argument('--fraccolpattern', dest='frac_colpattern',
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
118 help='Fraction number column pattern in peptide '
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
119 'table.', default=False, type=str)
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
120 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
121 'column number in peptide table. First column is 1.',
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
122 default=False, type=int)
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
123 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
124 'identify modification weights to be ignored.',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
125 default=[], nargs='+', type=str)
1
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
126 parser.add_argument('--stripcolpattern', dest='stripcolpattern',
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
127 help='Strip name column pattern in peptide '
8a30d6e5b97d planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents: 0
diff changeset
128 'table.', type=str, default=False)
0
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
129 parser.add_argument('--stripcol', dest='stripcol', help='Strip name '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
130 'column number in peptide table. Will be used to '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
131 'detect strips if multiple are present using pattern '
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
132 'passed with --strippatterns. First column is nr. 1.',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
133 default=False, type=int)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
134 parser.add_argument('--strippatterns', dest='pipatterns',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
135 help='Patterns to detect different pI ranges from e.g.'
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
136 ' file name in peptide table', nargs='+')
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
137 parser.add_argument('--intercepts', dest='intercepts',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
138 help='pI Intercept of strips', nargs='+', type=float)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
139 parser.add_argument('--widths', dest='fr_width', nargs='+',
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
140 help='Strip fraction widths in pI', type=float)
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
141 return parser.parse_args(sys.argv[1:])
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
142
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
143
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
144 if __name__ == '__main__':
34c5c95740a1 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff changeset
145 main()