Mercurial > repos > iracooke > tpp_prophets
changeset 12:4c66453a168e draft
Uploaded
author | iracooke |
---|---|
date | Mon, 16 Jun 2014 07:17:09 -0400 |
parents | 8512f117b10b |
children | b793fe628648 |
files | README README.md interprophet.xml peptide_prophet.xml pepxml_to_table.xml protein_prophet.xml protxml_to_table.xml repository_dependencies.xml |
diffstat | 8 files changed, 337 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/README Sat Jun 14 18:26:00 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -Requirements: -This package uses protk, msgfplus and proteowizard, which must be installed separately. - -For instructions please see: https://github.com/iracooke/protk/#galaxy-integration \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Mon Jun 16 07:17:09 2014 -0400 @@ -0,0 +1,8 @@ +## What is it? +Galaxy tool definition files and wrapper scripts for Peptide and Protein inference tools in the [Trans Proteomic Pipeline](http://tools.proteomecenter.org/wiki/index.php?title=Software:TPP) (Peptide Prophet, iProphet and Protein Prophet). + +## Installation +Install from the main galaxy toolshed at http://toolshed.g2.bx.psu.edu/ + +All the tools depend on command-line scripts and databases available in the [protk ruby gem](https://bitbucket.org/iracooke/protk). +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/interprophet.xml Mon Jun 16 07:17:09 2014 -0400 @@ -0,0 +1,72 @@ +<tool id="proteomics_search_interprophet_1" name="InterProphet" version="1.0.1"> + + <requirements> + <requirement type="package" version="1.3">protk</requirement> + <requirement type="package" version="4.6.3">trans_proteomic_pipeline</requirement> + </requirements> + + + <description>Combine Peptide Prophet results from multiple search engines</description> + + <command>interprophet.rb --galaxy + + -o interprophet_output.pep.xml + + $use_nss + + $use_nrs + + $use_nse + + $use_nsi + + $use_nsm + + --minprob $minprob + + ## Inputs. + ${first_input} + #for $input_file in $input_files: + ${input_file.additional_input} + #end for + + </command> + + <inputs> + + <param name="first_input" type="data" format="peptideprophet_pepxml" label="Peptide Prophet Results" help="These files will typically be outputs from search tools that have subsequently been run through peptide prophet"/> + + <repeat name="input_files" title="Additional PepXML Input Files"> + <param format="peptideprophet_pepxml" name="additional_input" type="data" label="PepXML produced by Peptide Prophet" help=""/> + </repeat> + + <param name="use_nss" checked="true" type="boolean" label="Include NSS in Model" help="Include NSS (Number of Sibling Searches) in Statistical Model" truevalue="blank" falsevalue="--no-nss"/> + <param name="use_nrs" checked="true" type="boolean" label="Include NRS in Model" help="Include NRS (Number of Replicate Spectra) in Statistical Model" truevalue="blank" falsevalue="--no-nrs"/> + <param name="use_nse" checked="true" type="boolean" label="Include NSE in Model" help="Include NSE (Number of Sibling Experiments) in Statistical Model" truevalue="blank" falsevalue="--no-nse"/> + <param name="use_nsi" checked="true" type="boolean" label="Include NSI in Model" help="Include NSI (Number of Sibling Ions) in Statistical Model" truevalue="blank" falsevalue="--no-nsi"/> + <param name="use_nsm" checked="true" type="boolean" label="Include NSM in Model" help="Include NSM (Number of Sibling Modifications) in Statistical Model" truevalue="blank" falsevalue="--no-nsm"/> + + <param name="minprob" type="text" label="Minimum threshod probability for reporting results"/> + + </inputs> + <outputs> + <data format="interprophet_pepxml" name="output" metadata_source="first_input" label="interprophet.${first_input.display_name}" from_work_dir="interprophet_output.pep.xml"/> + </outputs> + + <help> + +**What it does** + +Takes a set of pepXML files (possibly generated using different search engines) and calculates updated identification probabilities for each peptide. The updated probabilities are based on a statistical model that combines evidence from identifications across all of the input files, spectra, modified states and charge states. + +---- + +**Citation** + +If you use this tool please read and cite the paper describing iProphet + +Shteynberg D, et al. “iProphet: Improved statistical validation of peptide identifications in shotgun proteomics.” *Molecular and Cellular Proteomics* 10, M111.007690 (2011). + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/peptide_prophet.xml Mon Jun 16 07:17:09 2014 -0400 @@ -0,0 +1,100 @@ +<tool id="proteomics_search_peptide_prophet_1" name="Peptide Prophet" version="1.0.1"> + <requirements> + <requirement type="package" version="1.3">protk</requirement> + <requirement type="package" version="4.6.3">trans_proteomic_pipeline</requirement> + </requirements> + + <description>Calculate Peptide Prophet statistics on search results</description> + + <command> + peptide_prophet.rb --galaxy $input_file -o peptide_prophet_output.pep.xml + + -r + $glyco + $useicat + $phospho + $usepi + $usert + $accurate_mass + $no_ntt + $no_nmc + $use_gamma + $use_only_expect + $force_fit + $allow_alt_instruments + $maldi + + </command> + + <inputs> + + <param name="input_file" type="data" format="raw_pepxml" multiple="false" label="Raw Search Results" help="These files will typically be outputs from omssa or xtandem search tools"/> + + <param name="glyco" type="boolean" label="Expect true positives to have a glycocapture motif" truevalue="--glyco" falsevalue=""/> + <param name="useicat" type="boolean" label="Use icat information" truevalue="--useicat" falsevalue="--no-useicat"/> + <param name="phospho" type="boolean" label="Use phospho information" truevalue="--phospho" falsevalue=""/> + <param name="usepi" type="boolean" label="Use pI information" truevalue="--usepi" falsevalue=""/> + <param name="usert" type="boolean" label="Use hydrophobicity / RT information" truevalue="--usert" falsevalue=""/> + <param name="accurate_mass" type="boolean" label="Use accurate mass binning" truevalue="--accurate-mass" falsevalue=""/> + <param name="no_ntt" type="boolean" label="Don't use NTT model" truevalue="--no-ntt" falsevalue=""/> + <param name="no_nmc" type="boolean" label="Don't use NMC model" truevalue="--no-nmc" falsevalue=""/> + <param name="use_gamma" type="boolean" label="Use Gamma distribution to model the negatives" help="Applies only to X!Tandem results" truevalue="--usegamma" falsevalue=""/> + <param name="use_only_expect" type="boolean" label="Only use Expect Score as the discriminant" help="Applies only to X!Tandem results. + Helpful for data with homologous top hits e.g. phospho or glyco" truevalue="--use-only-expect" falsevalue=""/> + <param name="force_fit" type="boolean" label="Force fitting" help="Bypasses automatic mixture model checks and forces fitting of a mixture model" truevalue="--force-fit" falsevalue=""/> + <param name="allow_alt_instruments" type="boolean" label="Allow multiple instrument types" help="Warning instead of exit with error if instrument types between runs is different" truevalue="--allow-alt-instruments" falsevalue=""/> + <param name="maldi" type="boolean" label="Maldi data" truevalue="-l" falsevalue=""/> + + + </inputs> + <outputs> + <data format="peptideprophet_pepxml" name="output" metadata_source="input_file" label="peptide_prophet.${input_file.display_name}.pep.xml" from_work_dir="peptide_prophet_output.pep.xml"/> + </outputs> + +<help> + +**What it does** + +Given raw search engine scores as inputs this tool estimates the accuracy of peptide assignments. From a practical perspective it estimates the probability that each peptide assignment is correct (providing probabilities as outputs), given raw scores (possibly on some arbitrary scale) as inputs. + +---- + +**Citation** + +If you use this tool please read and cite the paper describing the statistical model implemented by Peptide Prophet + +Keller A., et al. “Empirical Statistical Model to Estimate the Accuracy of Peptide Identifications Made by MS/MS and Database Search” *Anal. Chem.* 74, 5383-5392 (2002). + + +</help> + + +<!--PeptideProphet options [following the 'O']: + i [use icat information in PeptideProphet] + f [do not use icat information in PeptideProphet] + g [use N-glyc motif information in PeptideProphet] + H [use Phospho information in PeptideProphet] + m [maldi data] + I [use pI information in PeptideProphet] + R [use Hydrophobicity / RT information in PeptideProphet] + F [force the fitting of the mixture model, bypass automatic mixture model checks] + A [use accurate mass binning in PeptideProphet] + w [warning instead of exit with error if instrument types between runs is different] + x [exclude all entries with asterisked score values in PeptideProphet] + l [leave alone all entries with asterisked score values in PeptideProphet] + n [use hardcoded default initialization parameters of the distributions] + P [use Non-parametric model, can only be used with decoy option] + N [do not use the NTT model] + M [do not use the NMC model] + G [use Gamma Distribution to model the Negatives (applies only to X!Tandem data)] + E [only use Expect Score as the Discriminant(applies only to X!Tandem data, + helpful for data with homologous top hits e.g. phospho or glyco)] + d [report decoy hits with a computed probability based on the model learned] + p [run ProteinProphet afterwards] + t [do not create png data plot] + u [do not assemble protein groups in ProteinProphet analysis] + s [do not use Occam's Razor in ProteinProphet analysis to + derive the simplest protein list to explain observed peptides] +--> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/pepxml_to_table.xml Mon Jun 16 07:17:09 2014 -0400 @@ -0,0 +1,29 @@ +<tool id="pepxml_to_table_1" name="PepXML to Table" version="1.0.1"> + + <requirements> + <requirement type="package" version="1.3">protk</requirement> + </requirements> + + + + <description>Converts a pepXML file to a tab delimited text file</description> + + +<!-- Note .. the input file is assumed to be the first argument --> +<command>pepxml_to_table.rb $input_file -o $output</command> + + +<inputs> + + <param name="input_file" type="data" format="pepxml,raw_pepxml,peptideprophet_pepxml,interprophet_pepxml" multiple="false" label="Input File" help="A pepXML file"/> + +</inputs> +<outputs> + <data format="csv" name="output" metadata_source="input_file" label="${input_file.display_name}.csv" /> +</outputs> + +<help> + Convert a pepXML file to Tab delimited text +</help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protein_prophet.xml Mon Jun 16 07:17:09 2014 -0400 @@ -0,0 +1,93 @@ +<tool id="proteomics_search_protein_prophet_1" name="Protein Prophet" version="1.0.1"> + <requirements> + <requirement type="package" version="1.3">protk</requirement> + <requirement type="package" version="4.6.3">trans_proteomic_pipeline</requirement> + </requirements> + + <description>Calculate Protein Prophet statistics on search results</description> + + +<!-- Note .. the input file is assumed to be the first argument --> + <command> + protein_prophet.rb + + --galaxy $input_file + + -o protein_prophet_results.prot.xml + + -r + + $iproph + $nooccam + $groupwts + $normprotlen + $logprobs + $confem + $allpeps + $unmapped + $instances + $delude + + --minprob=$minprob + --minindep=$minindep + </command> + <inputs> + + <param name="input_file" type="data" format="peptideprophet_pepxml,interprophet_pepxml" multiple="false" label="Peptide Prophet Results" help="These files will typically be outputs from peptide prophet or interprophet"/> + + + <param name="iproph" selected="true" type="boolean" label="Inputs are from iProphet" truevalue="--iprophet-input" falsevalue=""/> + <param name="nooccam" type="boolean" label="Don't apply Occam's razor" help="When selected no attempt will be made to derive the simplest protein list explaining observed peptides" truevalue="--no-occam" falsevalue=""/> + <param name="groupwts" type="boolean" label="Use group weights" help="Check peptide's total weight (rather than actual weight) in the Protein Group against the threshold" truevalue="--group-wts" falsevalue=""/> + <param name="normprotlen" type="boolean" label="Normalize NSP using Protein Length" truevalue="--norm-protlen" falsevalue=""/> + <param name="logprobs" type="boolean" label="Use the log of probability in the confidence calculations" truevalue="--log-prob" falsevalue=""/> + <param name="confem" type="boolean" label="Use the EM to compute probability given the confidenct" truevalue="--confem" falsevalue=""/> + <param name="allpeps" type="boolean" label="Consider all possible peptides in the database in the confidence model" truevalue="--allpeps" falsevalue=""/> + <param name="unmapped" type="boolean" label="Report results for unmapped proteins" truevalue="--unmapped" falsevalue=""/> + <param name="instances" type="boolean" label="Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment" truevalue="--instances" falsevalue=""/> + <param name="delude" type="boolean" label="Do NOT use peptide degeneracy information when assessing proteins" truevalue="--delude" falsevalue=""/> + + <param name="minprob" type="text" label="Minimum peptide prophet probability for peptides to be considered" value="0.05"/> + <param name="minindep" type="text" label="Minimum percentage of independent peptides required for a protein" value="0"/> + + </inputs> + <outputs> + <data format="protxml" name="output" metadata_source="input_file" label="protein_prophet.${input_file.display_name}.protXML" from_work_dir="protein_prophet_results.prot.xml"/> + </outputs> + + +<!--NOPLOT: do not generate plot png file + NOOCCAM: non-conservative maximum protein list + GROUPWTS: check peptide's total weight in the Protein Group against the threshold (default: check peptide's actual weight against threshold) + NORMPROTLEN: Normalize NSP using Protein Length + LOGPROBS: Use the log of the probabilities in the Confidence calculations + CONFEM: Use the EM to compute probability given the confidence + ALLPEPS: Consider all possible peptides in the database in the confidence model + UNMAPPED: Report results for UNMAPPED proteins + INSTANCES: Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment + DELUDE: do NOT use peptide degeneracy information when assessing proteins + + MINPROB: peptideProphet probabilty threshold (default=0.05) + MININDEP: minimum percentage of independent peptides required for a protein (default=0) + + +--> + + <help> + +**What it does** + +Given a set of peptide assignments from MS/MS spectra in the form of a pepXML file, this tool estimates probabilities at the protein level. As output, the tool produces a protXML file, which contains proteins along with the estimated probabilities that those proteins were present. Probabilities are estimated using a statistical model based on the number of peptides corresponding to that protein and the confidence that each of those peptides were assigned correctly. It takes account of the fact that peptides may correspond to more than one protein. + +---- + +**Citation** + +If you use this tool please read and cite the paper describing the statistical model implemented by Protein Prophet + +Nesvizhskii A., et al. “A Statistical Model for Identifying Proteins by Tandem Mass Spectrometry” *Anal. Chem.* 75, 4646-4658 (2003). + + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protxml_to_table.xml Mon Jun 16 07:17:09 2014 -0400 @@ -0,0 +1,35 @@ +<tool id="protxml_to_table_1" name="ProtXML to Table" version="1.0.1"> + <requirements> + <requirement type="package" version="1.3">protk</requirement> + </requirements> + + <description>Converts a ProtXML file to a table</description> + + <command> + protxml_to_table.rb + + $input_file + -o $output + </command> + + <inputs> + + <param format="protxml" name="input_file" type="data" label="ProtXML File to Convert"/> + + </inputs> + + + <outputs> + <data format="tabular" name="output" /> + </outputs> + + + <help> + +**What it does** + +Converts a ProtXML file to a tab separated table + + </help> + +</tool>
--- a/repository_dependencies.xml Sat Jun 14 18:26:00 2014 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -<?xml version="1.0"?> -<repositories description="Proteomics datatypes"> - <repository changeset_revision="f66f8ca7b7b9" name="proteomics_datatypes" owner="iracooke" toolshed="http://toolshed.g2.bx.psu.edu" /> - </repositories>