Mercurial > repos > galaxyp > hirieftools
annotate pi_database_splitter.py @ 3:78afc81ab244 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit decb06dc90d7069d317968b979f649a04720b264
author | galaxyp |
---|---|
date | Thu, 14 Sep 2017 11:55:02 -0400 |
parents | 8a30d6e5b97d |
children |
rev | line source |
---|---|
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
2 import sys |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
3 import argparse |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
4 from numpy import median |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
5 from contextlib import ExitStack |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
6 |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
7 from peptide_pi_annotator import get_col_by_pattern |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
8 |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
9 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
10 def main(): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
11 if sys.argv[1:] == []: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
12 sys.argv.append('-h') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
13 args = parse_commandline() |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
14 locfun = {False: locatefraction, |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
15 True: reverse_locatefraction}[args.reverse] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
16 # Column nrs should start from 0 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
17 # If negative, -1 is last item in list, etc |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
18 if args.fdrcol > 0: |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
19 fdrcol = args.fdrcol - 1 |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
20 elif args.fdrcol: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
21 fdrcol = args.fdrcol |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
22 elif args.fdrcolpattern: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
23 fdrcol = get_col_by_pattern(args.train_peptable, args.fdrcolpattern) |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
24 else: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
25 fdrcol = False |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
26 if args.deltapicol > 0: |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
27 deltapicol = args.deltapicol - 1 |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
28 elif args.deltapicol: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
29 deltapicol = args.deltapicol |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
30 elif args.deltapicolpattern: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
31 deltapicol = get_col_by_pattern(args.train_peptable, |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
32 args.deltapicolpattern) |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
33 else: |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
34 deltapicol = False |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
35 pishift = get_pishift(args.train_peptable, fdrcol, deltapicol, |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
36 args.fdrcutoff, args.picutoff) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
37 binarray = get_bin_array(args.fr_amount, args.fr_width, args.intercept, |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
38 args.tolerance, pishift) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
39 write_fractions(args.pipeps, args.fr_amount, args.prefix, |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
40 binarray, locfun, args.minlen, args.maxlen) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
41 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
42 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
43 def locatefraction(pep_pi, bins): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
44 index = [] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
45 for pibin in bins: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
46 if pep_pi > pibin[2]: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
47 continue |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
48 elif pep_pi >= pibin[1]: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
49 index.append(pibin[0]) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
50 else: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
51 return index |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
52 return index |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
53 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
54 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
55 def reverse_locatefraction(pep_pi, bins): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
56 index = [] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
57 for pibin in bins: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
58 if pep_pi < pibin[1]: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
59 continue |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
60 elif pep_pi < pibin[2]: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
61 index.append(pibin[0]) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
62 else: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
63 return index |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
64 return index |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
65 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
66 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
67 def parse_commandline(): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
68 parser = argparse.ArgumentParser( |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
69 formatter_class=argparse.RawTextHelpFormatter) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
70 parser.add_argument('-p', dest='train_peptable', help='Peptide table with ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
71 'peptides, FDR, and fraction numbers. Used to ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
72 'calculate pI shift. Leave emtpy for no shift. ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
73 'Tab separated file.') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
74 parser.add_argument('--deltacol', dest='deltapicol', help='Delta pI column' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
75 ' number in peptide table. First column is nr. 1. ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
76 'Negative number for counting from last col ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
77 '(-1 is last).', default=False, type=int) |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
78 parser.add_argument('--deltacolpattern', dest='deltapicolpattern', |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
79 help='Delta pI column header pattern in peptide ' |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
80 'table.', default=False, type=str) |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
81 parser.add_argument('--picutoff', dest='picutoff', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
82 help='delta pI value to filter experimental peptides' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
83 ' when calculating pi shift.', default=0.2, type=float) |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
84 parser.add_argument('--fdrcolpattern', dest='fdrcolpattern', |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
85 help='FDR column header pattern in peptide table.', |
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
86 default=False, type=str) |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
87 parser.add_argument('--fdrcol', dest='fdrcol', help='FDR column number in ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
88 'peptide table. First column is nr. 1. Empty includes ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
89 'all peptides', default=False, type=int) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
90 parser.add_argument('--fdrcutoff', dest='fdrcutoff', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
91 help='FDR cutoff value to filter experimental peptides' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
92 ' when calculating pi shift.', default=0, type=float) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
93 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file ' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
94 'with accession, peptide seq, pI value') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
95 parser.add_argument('--prefix', dest='prefix', default='pisep', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
96 help='Prefix for target/decoy output files') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
97 parser.add_argument('--tolerance', dest='tolerance', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
98 help='Strip fraction tolerance pi tolerance represents' |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
99 ' 2.5/97.5 percentile', type=float) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
100 parser.add_argument('--amount', dest='fr_amount', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
101 help='Strip fraction amount', type=int) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
102 parser.add_argument('--reverse', dest='reverse', help='Strip is reversed', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
103 action='store_const', const=True, default=False) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
104 parser.add_argument('--intercept', dest='intercept', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
105 help='pI Intercept of strip', type=float) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
106 parser.add_argument('--width', dest='fr_width', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
107 help='Strip fraction width in pI', type=float) |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
108 parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length', |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
109 type=int) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
110 parser.add_argument('--maxlen', dest='maxlen', help='Maximal peptide length', |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
111 type=int, default=False) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
112 return parser.parse_args(sys.argv[1:]) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
113 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
114 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
115 def get_pishift(peptable, fdrcol, deltapicol, fdrcutoff, delta_pi_cutoff): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
116 delta_pis = [] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
117 with open(peptable) as fp: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
118 next(fp) # skip header |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
119 for line in fp: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
120 line = line.strip('\n').split('\t') |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
121 if fdrcol: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
122 try: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
123 fdr = float(line[fdrcol]) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
124 except ValueError: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
125 continue |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
126 if fdr > fdrcutoff: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
127 continue |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
128 try: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
129 delta_pi = float(line[deltapicol]) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
130 except ValueError: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
131 continue |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
132 if delta_pi < delta_pi_cutoff: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
133 delta_pis.append(delta_pi) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
134 shift = median(delta_pis) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
135 print('pI shift (median of delta pIs): {}'.format(shift)) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
136 return shift |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
137 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
138 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
139 def get_bin_array(amount_fractions, fr_width, intercept, tolerance, pi_shift): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
140 frnr = 1 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
141 bin_array = [] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
142 while frnr <= amount_fractions: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
143 pi_center = fr_width * frnr + intercept |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
144 bin_left = pi_center - fr_width / 2 - tolerance - pi_shift |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
145 bin_right = pi_center + fr_width / 2 + tolerance - pi_shift |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
146 print('Bins in fraction', frnr, bin_left, bin_right) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
147 bin_array.append((frnr, bin_left, bin_right)) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
148 frnr += 1 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
149 return bin_array |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
150 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
151 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
152 def write_fractions(pi_peptides_fn, amount_fractions, out_prefix, |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
153 bin_array, locate_function, minlen, maxlen): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
154 amountpad = len(str(amount_fractions)) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
155 with ExitStack() as stack: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
156 target_out_fp = {frnr: ([], stack.enter_context( |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
157 open('{p}_fr{i:0{pad}}.fasta'.format(p=out_prefix, i=frnr, |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
158 pad=amountpad), 'w'))) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
159 for frnr in range(1, amount_fractions + 1)} |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
160 decoy_out_fp = {frnr: ([], stack.enter_context( |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
161 open('decoy_{p}_fr{i:0{pad}}.fasta'.format(p=out_prefix, i=frnr, |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
162 pad=amountpad), 'w'))) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
163 for frnr in range(1, amount_fractions + 1)} |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
164 input_fp = stack.enter_context(open(pi_peptides_fn)) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
165 pepcount = 0 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
166 for line in input_fp: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
167 accs, pep, pi = line.strip().split("\t") |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
168 pi = float(pi) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
169 if maxlen and len(pep) > maxlen: |
1
8a30d6e5b97d
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
galaxyp
parents:
0
diff
changeset
|
170 continue |
0
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
171 elif len(pep) >= minlen: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
172 pepcount += 1 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
173 if pep[-1] in {'K', 'R'}: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
174 rev_pep = pep[::-1][1:] + pep[-1] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
175 else: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
176 rev_pep = pep[::-1] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
177 for i in locate_function(pi, bin_array): |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
178 target_out_fp[i][0].append('>{}\n{}\n'.format(accs, pep)) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
179 # write pseudoReversed decoy peptide at the same time |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
180 decoy_out_fp[i][0].append('>decoy_{}\n{}\n'.format( |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
181 accs, rev_pep)) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
182 if pepcount > 1000000: |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
183 # write in chunks to make it go faster |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
184 pepcount = 0 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
185 [fp.write(''.join(peps)) for peps, fp in |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
186 target_out_fp.values()] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
187 [fp.write(''.join(peps)) for peps, fp in decoy_out_fp.values()] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
188 target_out_fp = {fr: ([], pep_fp[1]) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
189 for fr, pep_fp in target_out_fp.items()} |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
190 decoy_out_fp = {fr: ([], pep_fp[1]) |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
191 for fr, pep_fp in decoy_out_fp.items()} |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
192 [fp.write(''.join(peps)) for peps, fp in target_out_fp.values()] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
193 [fp.write(''.join(peps)) for peps, fp in decoy_out_fp.values()] |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
194 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
195 |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
196 if __name__ == '__main__': |
34c5c95740a1
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit a58e2a324724f344a07d4499c860a5b2da06927d
galaxyp
parents:
diff
changeset
|
197 main() |