Mercurial > repos > galaxyp > hirieftools
annotate peptide_pi_annotator.py @ 3:78afc81ab244 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
author | galaxyp |
---|---|
date | Thu, 14 Sep 2017 11:55:02 -0400 |
parents | 8a30d6e5b97d |
children |
rev | line source |
---|---|
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
2 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
3 import re |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
4 import sys |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
5 import argparse |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
6 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
7 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
8 def main(): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
9 if sys.argv[1:] == []: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
10 sys.argv.append('-h') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
11 args = parse_commandline() |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
12 strips = {} |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
13 if args.frac_col > 0: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
14 frac_col = args.frac_col - 1 |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
15 elif args.frac_col: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
16 frac_col = args.frac_col |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
17 elif args.frac_colpattern: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
18 frac_col = get_col_by_pattern(args.peptable, args.frac_colpattern) |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
19 else: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
20 raise RuntimeError('Must define fraction column') |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
21 if args.stripcol > 0: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
22 stripcol = args.stripcol - 1 |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
23 elif args.stripcol: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
24 stripcol = args.stripcol |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
25 elif args.stripcolpattern: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
26 stripcol = get_col_by_pattern(args.peptable, args.stripcolpattern) |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
27 else: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
28 raise RuntimeError('Must define strip column') |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
29 if args.pepcol: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
30 pepcol = args.pepcol - 1 |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
31 elif args.pepcolpattern: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
32 pepcol = get_col_by_pattern(args.peptable, args.pepcolpattern) |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
33 else: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
34 raise RuntimeError('Must define peptide sequence column') |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
35 for i, strip in enumerate(args.pipatterns): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
36 strips[strip] = {'intercept': args.intercepts[i], |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
37 'fr_width': args.fr_width[i]} |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
38 with open(args.outpeptable, 'w') as fp: |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
39 for outline in annotate_peptable(args.pipeps, args.peptable, pepcol, |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
40 frac_col, stripcol, strips, |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
41 args.ignoremods): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
42 fp.write('\t'.join([str(x) for x in outline])) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
43 fp.write('\n') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
44 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
45 |
3
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
46 def get_strip(strips, string): |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
47 for pattern in strips.keys(): |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
48 if re.search(pattern, string): |
3
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
49 return strips[pattern] |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
50 return False |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
51 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
52 |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
53 def get_col_by_pattern(peptable, colpattern): |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
54 with open(peptable) as fp: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
55 header = next(fp).strip('\n').split('\t') |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
56 for ix, field in enumerate(header): |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
57 if colpattern in field: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
58 return ix |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
59 |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
60 |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
61 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol, |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
62 strips, ignoremods): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
63 predicted_peps = {} |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
64 with open(predicted_peps_fn) as fp: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
65 for line in fp: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
66 line = line.strip('\n').split('\t') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
67 predicted_peps[line[0]] = line[1] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
68 not_predicted_count, predicted_count = 0, 0 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
69 with open(peptable) as fp: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
70 header = next(fp).strip('\n').split('\t') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
71 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI'] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
72 for line in fp: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
73 line = line.strip('\n').split('\t') |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
74 sequence = line[seqcol] |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
75 for weight in ignoremods: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
76 if weight == '*': |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
77 regex = '[+-]\d*\.\d*' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
78 else: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
79 regex = '[+-]{}'.format(weight) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
80 sequence = re.sub(regex, '', sequence) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
81 try: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
82 pred_pi = float(predicted_peps[sequence]) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
83 except KeyError: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
84 print('CANNOT PREDICT', sequence) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
85 not_predicted_count += 1 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
86 pred_pi, delta_pi = 'NA', 'NA' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
87 else: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
88 predicted_count += 1 |
3
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
89 strip = get_strip(strips, line[stripcol]) |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
90 if not strip: |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
91 exp_pi, delta_pi = 'NA', 'NA' |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
92 else: |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
93 try: |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
94 exp_pi = (strip['fr_width'] * int(line[frac_col]) + |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
95 strip['intercept']) |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
96 except ValueError: |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
97 print('Cannot detect fraction for PSM {}'.format(sequence)) |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
98 exp_pi, delta_pi = 'NA', 'NA' |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
99 else: |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
100 if pred_pi != 'NA': |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
101 delta_pi = exp_pi - pred_pi |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
102 else: |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
103 delta_pi = 'NA' |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
104 yield line + [exp_pi, pred_pi, delta_pi] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
105 print('Number of peptides without pI prediction: {}\n' |
3
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
106 'Number of peptides predicted: {}\n'.format(not_predicted_count, |
78afc81ab244
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
galaxyp
parents:
1
diff
changeset
|
107 predicted_count)) |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
108 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
109 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
110 def parse_commandline(): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
111 parser = argparse.ArgumentParser( |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
112 formatter_class=argparse.RawTextHelpFormatter) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
113 parser.add_argument('--out', dest='outpeptable', help='Output peptide ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
114 'table') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
115 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
116 'peptides, FDR, fraction numbers. Used to calculate' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
117 'pI shift.') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
118 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
119 'with peptide seq, pI value') |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
120 parser.add_argument('--pepcolpattern', dest='pepcolpattern', |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
121 help='Peptide sequence column pattern in peptide ' |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
122 'table.', default=False, type=str) |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
123 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
124 'column number in peptide table. First column is 1.', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
125 default=False, type=int) |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
126 parser.add_argument('--fraccolpattern', dest='frac_colpattern', |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
127 help='Fraction number column pattern in peptide ' |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
128 'table.', default=False, type=str) |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
129 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
130 'column number in peptide table. First column is 1.', |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
131 default=False, type=int) |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
132 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
133 'identify modification weights to be ignored.', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
134 default=[], nargs='+', type=str) |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
135 parser.add_argument('--stripcolpattern', dest='stripcolpattern', |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
136 help='Strip name column pattern in peptide ' |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
137 'table.', type=str, default=False) |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
138 parser.add_argument('--stripcol', dest='stripcol', help='Strip name ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
139 'column number in peptide table. Will be used to ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
140 'detect strips if multiple are present using pattern ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
141 'passed with --strippatterns. First column is nr. 1.', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
142 default=False, type=int) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
143 parser.add_argument('--strippatterns', dest='pipatterns', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
144 help='Patterns to detect different pI ranges from e.g.' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
145 ' file name in peptide table', nargs='+') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
146 parser.add_argument('--intercepts', dest='intercepts', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
147 help='pI Intercept of strips', nargs='+', type=float) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
148 parser.add_argument('--widths', dest='fr_width', nargs='+', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
149 help='Strip fraction widths in pI', type=float) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
150 return parser.parse_args(sys.argv[1:]) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
151 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
152 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
153 if __name__ == '__main__': |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
154 main() |