Mercurial > repos > galaxyp > hirieftools
changeset 1:8a30d6e5b97d draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/pi_db_tools commit ddcc42d2a767f7c14eb710b8ac264745c25444d3
author | galaxyp |
---|---|
date | Mon, 24 Jul 2017 05:25:22 -0400 |
parents | 34c5c95740a1 |
children | 77ddaee887a8 |
files | README.rst __pycache__/peptide_pi_annotator.cpython-36.pyc delta_pi_calc.xml peptide_pi_annotator.py pi_database_splitter.py pi_db_split.xml |
diffstat | 6 files changed, 181 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/README.rst Mon May 22 05:08:23 2017 -0400 +++ b/README.rst Mon Jul 24 05:25:22 2017 -0400 @@ -1,4 +1,4 @@ -GalaxyP - Percolator +GalaxyP - HiRIEF tools ======================= - Home: <https://github.com/galaxyproteomics/tools-galaxyp/>
--- a/delta_pi_calc.xml Mon May 22 05:08:23 2017 -0400 +++ b/delta_pi_calc.xml Mon Jul 24 05:25:22 2017 -0400 @@ -1,11 +1,25 @@ -<tool id="calc_delta_pi" name="Add delta pI" version="1.0"> +<tool id="calc_delta_pi" name="Add delta pI" version="1.1"> <requirements> <requirement type="package" version="3.6">python</requirement> </requirements> <description>to peptide table</description> <command> - python '$__tool_directory__/peptide_pi_annotator.py' -i '$trainingpi' -p '$peptable' - --stripcol $stripcol --pepcol $pepcol --fraccol $fraccol --out '$output' + python '$__tool_directory__/peptide_pi_annotator.py' -i '$trainingpi' -p '$peptable' --out '$output' + #if $stripcol + --stripcol $stripcol + #else if $stripcolpattern + --stripcolpattern '$stripcolpattern' + #end if + #if $pepcol + --pepcol $pepcol + #else if $pepcolpattern + --pepcolpattern '$pepcolpattern' + #end if + #if $fraccol + --fraccol $fraccol + #else if $fraccolpattern + --fraccolpattern '$fraccolpattern' + #end if --strippatterns #for $strip in $strips @@ -42,9 +56,12 @@ </sanitizer> </param> </repeat> - <param name="pepcol" type="integer" value="" label="Peptide sequence column in peptide table" /> - <param name="fraccol" type="integer" value="" label="Fraction number column in peptide table" /> - <param name="stripcol" type="integer" value="" label="Strip pattern column in peptide table" help="E.g. column with filename to derive strip name from"/> + <param name="pepcolpattern" type="text" value="" optional="true" label="Peptide sequence pattern for column header field in peptide table." /> + <param name="pepcol" type="integer" value="" optional="true" label="Peptide sequence column number in peptide table. First column is 1. Overrides column pattern." /> + <param name="fraccolpattern" type="text" value="" optional="true" label="Fraction number column header papttern in peptide table." /> + <param name="fraccol" type="integer" optional="true" value="" label="Fraction number column number in peptide table. First column is 1. Overrides column pattern." /> + <param name="stripcolpattern" type="text" optional="true" value="" label="Strip pattern header column pattern in peptide table" help="E.g. column with filename to derive strip name from"/> + <param name="stripcol" type="integer" optional="true" value="" label="Strip pattern column number in peptide table" help="E.g. column with filename to derive strip name from. First column is 1. Overrides column pattern"/> <repeat name="strips" title="pI separation strip data"> <param name="pattern" type="text" label="Strip regex detection pattern" help="Regex (see help below) that identifies the pI strip from the column in the above field."> <sanitizer> @@ -83,6 +100,27 @@ </repeat> <output name="output" value="peptable_deltapi.txt" /> </test> + <test> + <param name="trainingpi" value="predicted_peptides.txt" /> + <param name="peptable" value="peptable.txt" /> + <repeat name="ignoremods"> + <param name="regex" value="*" /> + </repeat> + <param name="pepcolpattern" value="Sequence" /> + <param name="fraccolpattern" value="Fraction" /> + <param name="stripcolpattern" value="Filename" /> + <repeat name="strips"> + <param name="pattern" value="strip1" /> + <param name="intercept" value="8.21" /> + <param name="fr_width" value="0.013" /> + </repeat> + <repeat name="strips"> + <param name="pattern" value="strip2" /> + <param name="intercept" value="6.11" /> + <param name="fr_width" value="0.04" /> + </repeat> + <output name="output" value="peptable_deltapi.txt" /> + </test> <test> <param name="trainingpi" value="predicted_peptides.txt" /> <param name="peptable" value="peptable.txt" />
--- a/peptide_pi_annotator.py Mon May 22 05:08:23 2017 -0400 +++ b/peptide_pi_annotator.py Mon Jul 24 05:25:22 2017 -0400 @@ -10,13 +10,34 @@ sys.argv.append('-h') args = parse_commandline() strips = {} + if args.frac_col > 0: + frac_col = args.frac_col - 1 + elif args.frac_col: + frac_col = args.frac_col + elif args.frac_colpattern: + frac_col = get_col_by_pattern(args.peptable, args.frac_colpattern) + else: + raise RuntimeError('Must define fraction column') + if args.stripcol > 0: + stripcol = args.stripcol - 1 + elif args.stripcol: + stripcol = args.stripcol + elif args.stripcolpattern: + stripcol = get_col_by_pattern(args.peptable, args.stripcolpattern) + else: + raise RuntimeError('Must define strip column') + if args.pepcol: + pepcol = args.pepcol - 1 + elif args.pepcolpattern: + pepcol = get_col_by_pattern(args.peptable, args.pepcolpattern) + else: + raise RuntimeError('Must define peptide sequence column') for i, strip in enumerate(args.pipatterns): strips[strip] = {'intercept': args.intercepts[i], 'fr_width': args.fr_width[i]} with open(args.outpeptable, 'w') as fp: - for outline in annotate_peptable(args.pipeps, args.peptable, - args.pepcol, args.frac_col, - args.stripcol, strips, + for outline in annotate_peptable(args.pipeps, args.peptable, pepcol, + frac_col, stripcol, strips, args.ignoremods): fp.write('\t'.join([str(x) for x in outline])) fp.write('\n') @@ -29,10 +50,16 @@ return False +def get_col_by_pattern(peptable, colpattern): + with open(peptable) as fp: + header = next(fp).strip('\n').split('\t') + for ix, field in enumerate(header): + if colpattern in field: + return ix + + def annotate_peptable(predicted_peps_fn, peptable, seqcol, frac_col, stripcol, strips, ignoremods): - if frac_col > 0: - frac_col -= 1 predicted_peps = {} with open(predicted_peps_fn) as fp: for line in fp: @@ -45,11 +72,11 @@ for line in fp: line = line.strip('\n').split('\t') strip = strips[get_first_matching_pattern(strips.keys(), - line[stripcol - 1])] + line[stripcol])] exp_pi = (strip['fr_width'] * int(line[frac_col]) + strip['intercept']) - sequence = line[seqcol - 1] + sequence = line[seqcol] for weight in ignoremods: if weight == '*': regex = '[+-]\d*\.\d*' @@ -81,15 +108,24 @@ 'pI shift.') parser.add_argument('-i', dest='pipeps', help='A tab-separated txt file ' 'with peptide seq, pI value') + parser.add_argument('--pepcolpattern', dest='pepcolpattern', + help='Peptide sequence column pattern in peptide ' + 'table.', default=False, type=str) parser.add_argument('--pepcol', dest='pepcol', help='Peptide sequence ' 'column number in peptide table. First column is 1.', default=False, type=int) + parser.add_argument('--fraccolpattern', dest='frac_colpattern', + help='Fraction number column pattern in peptide ' + 'table.', default=False, type=str) parser.add_argument('--fraccol', dest='frac_col', help='Fraction number ' 'column number in peptide table. First column is 1.', - type=int) + default=False, type=int) parser.add_argument('--ignoremods', dest='ignoremods', help='Regex to ' 'identify modification weights to be ignored.', default=[], nargs='+', type=str) + parser.add_argument('--stripcolpattern', dest='stripcolpattern', + help='Strip name column pattern in peptide ' + 'table.', type=str, default=False) parser.add_argument('--stripcol', dest='stripcol', help='Strip name ' 'column number in peptide table. Will be used to ' 'detect strips if multiple are present using pattern '
--- a/pi_database_splitter.py Mon May 22 05:08:23 2017 -0400 +++ b/pi_database_splitter.py Mon Jul 24 05:25:22 2017 -0400 @@ -4,6 +4,8 @@ from numpy import median from contextlib import ExitStack +from peptide_pi_annotator import get_col_by_pattern + def main(): if sys.argv[1:] == []: @@ -14,10 +16,23 @@ # Column nrs should start from 0 # If negative, -1 is last item in list, etc if args.fdrcol > 0: - args.fdrcol -= 1 + fdrcol = args.fdrcol - 1 + elif args.fdrcol: + fdrcol = args.fdrcol + elif args.fdrcolpattern: + fdrcol = get_col_by_pattern(args.train_peptable, args.fdrcolpattern) + else: + fdrcol = False if args.deltapicol > 0: - args.deltapicol -= 1 - pishift = get_pishift(args.train_peptable, args.fdrcol, args.deltapicol, + deltapicol = args.deltapicol - 1 + elif args.deltapicol: + deltapicol = args.deltapicol + elif args.deltapicolpattern: + deltapicol = get_col_by_pattern(args.train_peptable, + args.deltapicolpattern) + else: + deltapicol = False + pishift = get_pishift(args.train_peptable, fdrcol, deltapicol, args.fdrcutoff, args.picutoff) binarray = get_bin_array(args.fr_amount, args.fr_width, args.intercept, args.tolerance, pishift) @@ -60,9 +75,15 @@ ' number in peptide table. First column is nr. 1. ' 'Negative number for counting from last col ' '(-1 is last).', default=False, type=int) + parser.add_argument('--deltacolpattern', dest='deltapicolpattern', + help='Delta pI column header pattern in peptide ' + 'table.', default=False, type=str) parser.add_argument('--picutoff', dest='picutoff', help='delta pI value to filter experimental peptides' ' when calculating pi shift.', default=0.2, type=float) + parser.add_argument('--fdrcolpattern', dest='fdrcolpattern', + help='FDR column header pattern in peptide table.', + default=False, type=str) parser.add_argument('--fdrcol', dest='fdrcol', help='FDR column number in ' 'peptide table. First column is nr. 1. Empty includes ' 'all peptides', default=False, type=int) @@ -84,7 +105,7 @@ help='pI Intercept of strip', type=float) parser.add_argument('--width', dest='fr_width', help='Strip fraction width in pI', type=float) - parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length', + parser.add_argument('--minlen', dest='minlen', help='Minimal peptide length', type=int) parser.add_argument('--maxlen', dest='maxlen', help='Maximal peptide length', type=int, default=False) @@ -146,7 +167,7 @@ accs, pep, pi = line.strip().split("\t") pi = float(pi) if maxlen and len(pep) > maxlen: - continue + continue elif len(pep) >= minlen: pepcount += 1 if pep[-1] in {'K', 'R'}:
--- a/pi_db_split.xml Mon May 22 05:08:23 2017 -0400 +++ b/pi_db_split.xml Mon Jul 24 05:25:22 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="pi_db_split" name="Split peptide database" version="1.0"> +<tool id="pi_db_split" name="Split peptide database" version="1.1"> <description>into pI separated fractions</description> <requirements> <requirement type="package">numpy</requirement> @@ -8,14 +8,26 @@ <![CDATA[ mkdir pi_fr_out && cd pi_fr_out && python '$__tool_directory__/pi_database_splitter.py' -i '$pipeptides' -p '$peptable' - --intercept $intercept --width $fr_width --tolerance $tolerance --amount $fr_amount --prefix pisplit - --deltacol $deltacol --picutoff $picutoff + #for $strip in $strips + #if not $strip.peptable_pattern or str($strip.peptable_pattern) in $peptable.element_identifier + --intercept $strip.intercept --width $strip.fr_width --tolerance $strip.tolerance --amount $strip.fr_amount --prefix pisplit --picutoff $strip.picutoff + #if $strip.reverse + --reverse + #end if + #break + #end if + #end for + + #if $deltacol + --deltacol $deltacol + #else if $deltacolpattern + --deltacolpattern '$deltacolpattern' + #end if #if $fdrcol --fdrcol $fdrcol --fdrcutoff $fdrcutoff + #else if $fdrcolpattern + --fdrcolpattern '$fdrcolpattern' --fdrcutoff $fdrcutoff #end if - #if $reverse - --reverse - #end if #if $maxlen --maxlen $maxlen #end if @@ -26,17 +38,22 @@ <inputs> <param name="pipeptides" type="data" format="tabular" label="Target peptides with pI and accession" help="First col accession, second sequence, third pI" /> <param name="peptable" type="data" format="tabular" label="Peptide table to determine pI shift from" help="Should have delta pI as a column" /> - <param name="fdrcol" type="integer" value="" optional="true" label="FDR (q-value) column in peptide table" /> + <param name="fdrcolpattern" type="text" optional="true" label="FDR (q-value) column pattern in peptide table" /> + <param name="fdrcol" type="integer" value="" optional="true" label="FDR (q-value) column number in peptide table" help="Overrides column pattern if filled. First column is 1" /> <param name="fdrcutoff" type="float" value="0.0" help="Not used when no FDR column specified" label="FDR value cutoff for inclusion in shift determination" /> - <param name="deltacol" type="integer" value="" label="Delta pI column in peptide table" /> - <param name="picutoff" type="float" value="0.2" optional="true" label="delta-pI cutoff for inclusion in shift determination" /> + <param name="deltacolpattern" type="text" value="" label="Delta pI column pattern in peptide table" /> + <param name="deltacol" type="integer" optional="true" value="" label="Delta pI column number in peptide table" help="Overrides column pattern if filled. First column is 1"/> <param name="minlen" type="integer" value="8" label="Minimum length of peptide to include in split DB" /> <param name="maxlen" type="integer" optional="true" value="" label="Max. length of peptide to include in split DB" /> - <param name="intercept" type="float" value="" label="Intercept of pI strip" /> - <param name="fr_width" type="float" value="" label="Fraction width" /> - <param name="tolerance" type="float" value="" label="pI tolerance" /> - <param name="fr_amount" type="integer" value="" label="Fraction amount" /> - <param name="reverse" type="boolean" label="Strip is reversed (high-to-low pI)?" /> + <repeat name="strips" title="pI separation strip data"> + <param name="peptable_pattern" type="text" label="Pattern to find correct peptide table for a strip, for when multiple peptide tables have different strips" help="Will match against peptide table's name. Leave blank for single peptide table or when using same strip in all tables" /> + <param name="intercept" type="float" value="" label="Intercept of pI strip" /> + <param name="fr_width" type="float" value="" label="Fraction width" /> + <param name="tolerance" type="float" value="" label="pI tolerance" /> + <param name="fr_amount" type="integer" value="" label="Fraction amount" /> + <param name="reverse" type="boolean" label="Strip is reversed (high-to-low pI)?" /> + <param name="picutoff" type="float" value="0.2" optional="true" label="delta-pI cutoff for inclusion in shift determination" /> + </repeat> </inputs> <outputs> @@ -54,13 +71,42 @@ <param name="fdrcol" value="3" /> <param name="fdrcutoff" value="0.2" /> <param name="deltacol" value="-1" /> - <param name="picutoff" value="10" /> <param name="minlen" value="8" /> - <param name="intercept" value="5.6" /> - <param name="fr_width" value="1.3" /> - <param name="tolerance" value="0.1" /> - <param name="fr_amount" value="3" /> - <param name="reverse" value="false" /> + <repeat name="strips"> + <param name="peptable_pattern" value="deltapi" /> + <param name="intercept" value="5.6" /> + <param name="fr_width" value="1.3" /> + <param name="tolerance" value="0.1" /> + <param name="fr_amount" value="3" /> + <param name="reverse" value="false" /> + <param name="picutoff" value="10" /> + </repeat> + <output_collection name="target_pi_db" type="list"> + <element name="fr1" value="target_splitdb_fr1.fasta" /> + <element name="fr2" value="target_splitdb_fr2.fasta" /> + <element name="fr3" value="target_splitdb_fr3.fasta" /> + </output_collection> + <output_collection name="decoy_pi_db" type="list"> + <element name="fr1" value="decoy_splitdb_fr1.fasta" /> + <element name="fr2" value="decoy_splitdb_fr2.fasta" /> + <element name="fr3" value="decoy_splitdb_fr3.fasta" /> + </output_collection> + </test> + <test> + <param name="pipeptides" value="predicted_peptides_to_split.txt" /> + <param name="peptable" value="peptable_deltapi.txt" /> + <param name="fdrcolpattern" value="FDR" /> + <param name="fdrcutoff" value="0.2" /> + <param name="deltacolpattern" value="Delta" /> + <param name="minlen" value="8" /> + <repeat name="strips"> + <param name="intercept" value="5.6" /> + <param name="fr_width" value="1.3" /> + <param name="tolerance" value="0.1" /> + <param name="fr_amount" value="3" /> + <param name="reverse" value="false" /> + <param name="picutoff" value="10" /> + </repeat> <output_collection name="target_pi_db" type="list"> <element name="fr1" value="target_splitdb_fr1.fasta" /> <element name="fr2" value="target_splitdb_fr2.fasta" />