comparison peptide_pi_annotator.py @ 1:8a30d6e5b97d draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
author galaxyp
date Mon, 24 Jul 2017 05:25:22 -0400
parents 34c5c95740a1
children 78afc81ab244
comparison
equal deleted inserted replaced
0:34c5c95740a1 1:8a30d6e5b97d
8 def main(): 8 def main():
9 if sys.argv[1:] == []: 9 if sys.argv[1:] == []:
10 sys.argv.append('-h') 10 sys.argv.append('-h')
11 args = parse_commandline() 11 args = parse_commandline()
12 strips = {} 12 strips = {}
13 if args.frac_col > 0:
14 frac_col = args.frac_col - 1
15 elif args.frac_col:
16 frac_col = args.frac_col
17 elif args.frac_colpattern:
18 frac_col = get_col_by_pattern(args.peptable, args.frac_colpattern)
19 else:
20 raise RuntimeError('Must define fraction column')
21 if args.stripcol > 0:
22 stripcol = args.stripcol - 1
23 elif args.stripcol:
24 stripcol = args.stripcol
25 elif args.stripcolpattern:
26 stripcol = get_col_by_pattern(args.peptable, args.stripcolpattern)
27 else:
28 raise RuntimeError('Must define strip column')
29 if args.pepcol:
30 pepcol = args.pepcol - 1
31 elif args.pepcolpattern:
32 pepcol = get_col_by_pattern(args.peptable, args.pepcolpattern)
33 else:
34 raise RuntimeError('Must define peptide sequence column')
13 for i, strip in enumerate(args.pipatterns): 35 for i, strip in enumerate(args.pipatterns):
14 strips[strip] = {'intercept': args.intercepts[i], 36 strips[strip] = {'intercept': args.intercepts[i],
15 'fr_width': args.fr_width[i]} 37 'fr_width': args.fr_width[i]}
16 with open(args.outpeptable, 'w') as fp: 38 with open(args.outpeptable, 'w') as fp:
17 for outline in annotate_peptable(args.pipeps, args.peptable, 39 for outline in annotate_peptable(args.pipeps, args.peptable, pepcol,
18 args.pepcol, args.frac_col, 40 frac_col, stripcol, strips,
19 args.stripcol, strips,
20 args.ignoremods): 41 args.ignoremods):
21 fp.write('\t'.join([str(x) for x in outline])) 42 fp.write('\t'.join([str(x) for x in outline]))
22 fp.write('\n') 43 fp.write('\n')
23 44
24 45
27 if re.search(pattern, string): 48 if re.search(pattern, string):
28 return pattern 49 return pattern
29 return False 50 return False
30 51
31 52
53 def get_col_by_pattern(peptable, colpattern):
54 with open(peptable) as fp:
55 header = next(fp).strip('\n').split('\t')
56 for ix, field in enumerate(header):
57 if colpattern in field:
58 return ix
59
60
32 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol, 61 def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol,
33 strips, ignoremods): 62 strips, ignoremods):
34 if frac_col > 0:
35 frac_col -= 1
36 predicted_peps = {} 63 predicted_peps = {}
37 with open(predicted_peps_fn) as fp: 64 with open(predicted_peps_fn) as fp:
38 for line in fp: 65 for line in fp:
39 line = line.strip('\n').split('\t') 66 line = line.strip('\n').split('\t')
40 predicted_peps[line[0]] = line[1] 67 predicted_peps[line[0]] = line[1]
43 header = next(fp).strip('\n').split('\t') 70 header = next(fp).strip('\n').split('\t')
44 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI'] 71 yield header + ['Experimental pI', 'Predicted pI', 'Delta pI']
45 for line in fp: 72 for line in fp:
46 line = line.strip('\n').split('\t') 73 line = line.strip('\n').split('\t')
47 strip = strips[get_first_matching_pattern(strips.keys(), 74 strip = strips[get_first_matching_pattern(strips.keys(),
48 line[stripcol - 1])] 75 line[stripcol])]
49 exp_pi = (strip['fr_width'] * int(line[frac_col]) + 76 exp_pi = (strip['fr_width'] * int(line[frac_col]) +
50 strip['intercept']) 77 strip['intercept'])
51 78
52 sequence = line[seqcol - 1] 79 sequence = line[seqcol]
53 for weight in ignoremods: 80 for weight in ignoremods:
54 if weight == '*': 81 if weight == '*':
55 regex = '[+-]\d*\.\d*' 82 regex = '[+-]\d*\.\d*'
56 else: 83 else:
57 regex = '[+-]{}'.format(weight) 84 regex = '[+-]{}'.format(weight)
79 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with ' 106 parser.add_argument('-p', dest='peptable', help='Peptide/PSM table with '
80 'peptides, FDR, fraction numbers. Used to calculate' 107 'peptides, FDR, fraction numbers. Used to calculate'
81 'pI shift.') 108 'pI shift.')
82 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file ' 109 parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file '
83 'with peptide seq, pI value') 110 'with peptide seq, pI value')
111 parser.add_argument('--pepcolpattern', dest='pepcolpattern',
112 help='Peptide sequence column pattern in peptide '
113 'table.', default=False, type=str)
84 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence ' 114 parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence '
85 'column number in peptide table. First column is 1.', 115 'column number in peptide table. First column is 1.',
86 default=False, type=int) 116 default=False, type=int)
117 parser.add_argument('--fraccolpattern', dest='frac_colpattern',
118 help='Fraction number column pattern in peptide '
119 'table.', default=False, type=str)
87 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number ' 120 parser.add_argument('--fraccol', dest='frac_col', help='Fraction number '
88 'column number in peptide table. First column is 1.', 121 'column number in peptide table. First column is 1.',
89 type=int) 122 default=False, type=int)
90 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to ' 123 parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to '
91 'identify modification weights to be ignored.', 124 'identify modification weights to be ignored.',
92 default=[], nargs='+', type=str) 125 default=[], nargs='+', type=str)
126 parser.add_argument('--stripcolpattern', dest='stripcolpattern',
127 help='Strip name column pattern in peptide '
128 'table.', type=str, default=False)
93 parser.add_argument('--stripcol', dest='stripcol', help='Strip name ' 129 parser.add_argument('--stripcol', dest='stripcol', help='Strip name '
94 'column number in peptide table. Will be used to ' 130 'column number in peptide table. Will be used to '
95 'detect strips if multiple are present using pattern ' 131 'detect strips if multiple are present using pattern '
96 'passed with --strippatterns. First column is nr. 1.', 132 'passed with --strippatterns. First column is nr. 1.',
97 default=False, type=int) 133 default=False, type=int)