# HG changeset patch # User galaxyp # Date 1500888322 14400 # Node ID 8a30d6e5b97ddcf2a9c6ed02f787b233c921f954 # Parent 34c5c95740a17cbc3835ca179042924b78a72a18 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3 diff -r 34c5c95740a1 -r 8a30d6e5b97d README.rst --- a/README.rst Mon May 22 05:08:23 2017 -0400 +++ b/README.rst Mon Jul 24 05:25:22 2017 -0400 @@ -1,4 +1,4 @@ -GalaxyP - Percolator +GalaxyP - HiRIEF tools ======================= - Home: diff -r 34c5c95740a1 -r 8a30d6e5b97d __pycache__/peptide_pi_annotator.cpython-36.pyc Binary file __pycache__/peptide_pi_annotator.cpython-36.pyc has changed diff -r 34c5c95740a1 -r 8a30d6e5b97d delta_pi_calc.xml --- a/delta_pi_calc.xml Mon May 22 05:08:23 2017 -0400 +++ b/delta_pi_calc.xml Mon Jul 24 05:25:22 2017 -0400 @@ -1,11 +1,25 @@ - + python to peptide table - python '$__tool_directory__/peptide_pi_annotator.py' -i '$trainingpi' -p '$peptable' - --stripcol $stripcol --pepcol $pepcol --fraccol $fraccol --out '$output' + python '$__tool_directory__/peptide_pi_annotator.py' -i '$trainingpi' -p '$peptable' --out '$output' + #if $stripcol + --stripcol $stripcol + #else if $stripcolpattern + --stripcolpattern '$stripcolpattern' + #end if + #if $pepcol + --pepcol $pepcol + #else if $pepcolpattern + --pepcolpattern '$pepcolpattern' + #end if + #if $fraccol + --fraccol $fraccol + #else if $fraccolpattern + --fraccolpattern '$fraccolpattern' + #end if --strippatterns #for $strip in $strips @@ -42,9 +56,12 @@ - - - + + + + + + @@ -83,6 +100,27 @@ + + + + + + + + + + + + + + + + + + + + + diff -r 34c5c95740a1 -r 8a30d6e5b97d peptide_pi_annotator.py --- a/peptide_pi_annotator.py Mon May 22 05:08:23 2017 -0400 +++ b/peptide_pi_annotator.py Mon Jul 24 05:25:22 2017 -0400 @@ -10,13 +10,34 @@ sys.argv.append('-h') args = parse_commandline() strips = {} + if args.frac_col > 0: + frac_col = args.frac_col - 1 + elif args.frac_col: + frac_col = args.frac_col + elif args.frac_colpattern: + frac_col = get_col_by_pattern(args.peptable, args.frac_colpattern) + else: + raise RuntimeError('Must define fraction column') + if args.stripcol > 0: + stripcol = args.stripcol - 1 + elif args.stripcol: + stripcol = args.stripcol + elif args.stripcolpattern: + stripcol = get_col_by_pattern(args.peptable, args.stripcolpattern) + else: + raise RuntimeError('Must define strip column') + if args.pepcol: + pepcol = args.pepcol - 1 + elif args.pepcolpattern: + pepcol = get_col_by_pattern(args.peptable, args.pepcolpattern) + else: + raise RuntimeError('Must define peptide sequence column') for i, strip in enumerate(args.pipatterns): strips[strip] = {'intercept': args.intercepts[i], 'fr_width': args.fr_width[i]} with open(args.outpeptable, 'w') as fp: - for outline in annotate_peptable(args.pipeps, args.peptable, - args.pepcol, args.frac_col, - args.stripcol, strips, + for outline in annotate_peptable(args.pipeps, args.peptable, pepcol, + frac_col, stripcol, strips, args.ignoremods): fp.write('\t'.join([str(x) for x in outline])) fp.write('\n') @@ -29,10 +50,16 @@ return False +def get_col_by_pattern(peptable, colpattern): + with open(peptable) as fp: + header = next(fp).strip('\n').split('\t') + for ix, field in enumerate(header): + if colpattern in field: + return ix + + def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol, strips, ignoremods): - if frac_col > 0: - frac_col -= 1 predicted_peps = {} with open(predicted_peps_fn) as fp: for line in fp: @@ -45,11 +72,11 @@ for line in fp: line = line.strip('\n').split('\t') strip = strips[get_first_matching_pattern(strips.keys(), - line[stripcol - 1])] + line[stripcol])] exp_pi = (strip['fr_width'] * int(line[frac_col]) + strip['intercept']) - sequence = line[seqcol - 1] + sequence = line[seqcol] for weight in ignoremods: if weight == '*': regex = '[+-]\d*\.\d*' @@ -81,15 +108,24 @@ 'pI shift.') parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file ' 'with peptide seq, pI value') + parser.add_argument('--pepcolpattern', dest='pepcolpattern', + help='Peptide sequence column pattern in peptide ' + 'table.', default=False, type=str) parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence ' 'column number in peptide table. First column is 1.', default=False, type=int) + parser.add_argument('--fraccolpattern', dest='frac_colpattern', + help='Fraction number column pattern in peptide ' + 'table.', default=False, type=str) parser.add_argument('--fraccol', dest='frac_col', help='Fraction number ' 'column number in peptide table. First column is 1.', - type=int) + default=False, type=int) parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to ' 'identify modification weights to be ignored.', default=[], nargs='+', type=str) + parser.add_argument('--stripcolpattern', dest='stripcolpattern', + help='Strip name column pattern in peptide ' + 'table.', type=str, default=False) parser.add_argument('--stripcol', dest='stripcol', help='Strip name ' 'column number in peptide table. Will be used to ' 'detect strips if multiple are present using pattern ' diff -r 34c5c95740a1 -r 8a30d6e5b97d pi_database_splitter.py --- a/pi_database_splitter.py Mon May 22 05:08:23 2017 -0400 +++ b/pi_database_splitter.py Mon Jul 24 05:25:22 2017 -0400 @@ -4,6 +4,8 @@ from numpy import median from contextlib import ExitStack +from peptide_pi_annotator import get_col_by_pattern + def main(): if sys.argv[1:] == []: @@ -14,10 +16,23 @@ # Column nrs should start from 0 # If negative, -1 is last item in list, etc if args.fdrcol > 0: - args.fdrcol -= 1 + fdrcol = args.fdrcol - 1 + elif args.fdrcol: + fdrcol = args.fdrcol + elif args.fdrcolpattern: + fdrcol = get_col_by_pattern(args.train_peptable, args.fdrcolpattern) + else: + fdrcol = False if args.deltapicol > 0: - args.deltapicol -= 1 - pishift = get_pishift(args.train_peptable, args.fdrcol, args.deltapicol, + deltapicol = args.deltapicol - 1 + elif args.deltapicol: + deltapicol = args.deltapicol + elif args.deltapicolpattern: + deltapicol = get_col_by_pattern(args.train_peptable, + args.deltapicolpattern) + else: + deltapicol = False + pishift = get_pishift(args.train_peptable, fdrcol, deltapicol, args.fdrcutoff, args.picutoff) binarray = get_bin_array(args.fr_amount, args.fr_width, args.intercept, args.tolerance, pishift) @@ -60,9 +75,15 @@ ' number in peptide table. First column is nr. 1. ' 'Negative number for counting from last col ' '(-1 is last).', default=False, type=int) + parser.add_argument('--deltacolpattern', dest='deltapicolpattern', + help='Delta pI column header pattern in peptide ' + 'table.', default=False, type=str) parser.add_argument('--picutoff', dest='picutoff', help='delta pI value to filter experimental peptides' ' when calculating pi shift.', default=0.2, type=float) + parser.add_argument('--fdrcolpattern', dest='fdrcolpattern', + help='FDR column header pattern in peptide table.', + default=False, type=str) parser.add_argument('--fdrcol', dest='fdrcol', help='FDR column number in ' 'peptide table. First column is nr. 1. Empty includes ' 'all peptides', default=False, type=int) @@ -84,7 +105,7 @@ help='pI Intercept of strip', type=float) parser.add_argument('--width', dest='fr_width', help='Strip fraction width in pI', type=float) - parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length', + parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length', type=int) parser.add_argument('--maxlen', dest='maxlen', help='Maximal peptide length', type=int, default=False) @@ -146,7 +167,7 @@ accs, pep, pi = line.strip().split("\t") pi = float(pi) if maxlen and len(pep) > maxlen: - continue + continue elif len(pep) >= minlen: pepcount += 1 if pep[-1] in {'K', 'R'}: diff -r 34c5c95740a1 -r 8a30d6e5b97d pi_db_split.xml --- a/pi_db_split.xml Mon May 22 05:08:23 2017 -0400 +++ b/pi_db_split.xml Mon Jul 24 05:25:22 2017 -0400 @@ -1,4 +1,4 @@ - + into pI separated fractions numpy @@ -8,14 +8,26 @@ - + + - - + + - - - - - + + + + + + + + + @@ -54,13 +71,42 @@ - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +