annotate protein_prophet.xml @ 8:d19a95abf2e4

Update
author Ira Cooke <iracooke@gmail.com>
date Sun, 09 Jun 2013 08:19:01 -0500
parents 3f0cb90824f1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
7
3f0cb90824f1 Uploaded
iracooke
parents: 6
diff changeset
1 <tool id="proteomics_search_protein_prophet_1" name="Protein Prophet" version="1.0.1">
2
25261529840c Uploaded
iracooke
parents:
diff changeset
2 <requirements>
8
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
3 <requirement type="package" version="1.2.2">galaxy_protk</requirement>
2
25261529840c Uploaded
iracooke
parents:
diff changeset
4 <requirement type="package" version="4.6.1">trans_proteomic_pipeline</requirement>
25261529840c Uploaded
iracooke
parents:
diff changeset
5 </requirements>
5
97f1c89cd831 Uploaded
iracooke
parents: 2
diff changeset
6
2
25261529840c Uploaded
iracooke
parents:
diff changeset
7 <description>Calculate Protein Prophet statistics on search results</description>
25261529840c Uploaded
iracooke
parents:
diff changeset
8
25261529840c Uploaded
iracooke
parents:
diff changeset
9
25261529840c Uploaded
iracooke
parents:
diff changeset
10 <!-- Note .. the input file is assumed to be the first argument -->
8
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
11 <command interpreter="bash">
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
12 protein_prophet_wrapper.sh
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
13
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
14 --galaxy $input_file
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
15
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
16 -r
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
17
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
18 $iproph
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
19 $nooccam
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
20 $groupwts
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
21 $normprotlen
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
22 $logprobs
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
23 $confem
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
24 $allpeps
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
25 $unmapped
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
26 $instances
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
27 $delude
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
28
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
29 --minprob=$minprob
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
30 --minindep=$minindep
Ira Cooke <iracooke@gmail.com>
parents: 7
diff changeset
31 </command>
2
25261529840c Uploaded
iracooke
parents:
diff changeset
32 <inputs>
25261529840c Uploaded
iracooke
parents:
diff changeset
33
25261529840c Uploaded
iracooke
parents:
diff changeset
34 <param name="input_file" type="data" format="peptideprophet_pepxml,interprophet_pepxml" multiple="false" label="Peptide Prophet Results" help="These files will typically be outputs from peptide prophet or interprophet"/>
25261529840c Uploaded
iracooke
parents:
diff changeset
35
25261529840c Uploaded
iracooke
parents:
diff changeset
36
25261529840c Uploaded
iracooke
parents:
diff changeset
37 <param name="iproph" selected="true" type="boolean" label="Inputs are from iProphet" truevalue="--iprophet-input" falsevalue=""/>
25261529840c Uploaded
iracooke
parents:
diff changeset
38 <param name="nooccam" type="boolean" label="Don't apply Occam's razor" help="When selected no attempt will be made to derive the simplest protein list explaining observed peptides" truevalue="--no-occam" falsevalue=""/>
25261529840c Uploaded
iracooke
parents:
diff changeset
39 <param name="groupwts" type="boolean" label="Use group weights" help="Check peptide's total weight (rather than actual weight) in the Protein Group against the threshold" truevalue="--group-wts" falsevalue=""/>
25261529840c Uploaded
iracooke
parents:
diff changeset
40 <param name="normprotlen" type="boolean" label="Normalize NSP using Protein Length" truevalue="--norm-protlen" falsevalue=""/>
25261529840c Uploaded
iracooke
parents:
diff changeset
41 <param name="logprobs" type="boolean" label="Use the log of probability in the confidence calculations" truevalue="--log-prob" falsevalue=""/>
25261529840c Uploaded
iracooke
parents:
diff changeset
42 <param name="confem" type="boolean" label="Use the EM to compute probability given the confidenct" truevalue="--confem" falsevalue=""/>
25261529840c Uploaded
iracooke
parents:
diff changeset
43 <param name="allpeps" type="boolean" label="Consider all possible peptides in the database in the confidence model" truevalue="--allpeps" falsevalue=""/>
25261529840c Uploaded
iracooke
parents:
diff changeset
44 <param name="unmapped" type="boolean" label="Report results for unmapped proteins" truevalue="--unmapped" falsevalue=""/>
25261529840c Uploaded
iracooke
parents:
diff changeset
45 <param name="instances" type="boolean" label="Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment" truevalue="--instances" falsevalue=""/>
25261529840c Uploaded
iracooke
parents:
diff changeset
46 <param name="delude" type="boolean" label="Do NOT use peptide degeneracy information when assessing proteins" truevalue="--delude" falsevalue=""/>
25261529840c Uploaded
iracooke
parents:
diff changeset
47
25261529840c Uploaded
iracooke
parents:
diff changeset
48 <param name="minprob" type="text" label="Minimum peptide prophet probability for peptides to be considered" value="0.05"/>
25261529840c Uploaded
iracooke
parents:
diff changeset
49 <param name="minindep" type="text" label="Minimum percentage of independent peptides required for a protein" value="0"/>
25261529840c Uploaded
iracooke
parents:
diff changeset
50
25261529840c Uploaded
iracooke
parents:
diff changeset
51 </inputs>
25261529840c Uploaded
iracooke
parents:
diff changeset
52 <outputs>
25261529840c Uploaded
iracooke
parents:
diff changeset
53 <data format="protxml" name="output" metadata_source="input_file" label="protein_prophet.${input_file.display_name}.protXML" from_work_dir="protein_prophet_results.prot.xml"/>
25261529840c Uploaded
iracooke
parents:
diff changeset
54 </outputs>
25261529840c Uploaded
iracooke
parents:
diff changeset
55
25261529840c Uploaded
iracooke
parents:
diff changeset
56
25261529840c Uploaded
iracooke
parents:
diff changeset
57 <!--NOPLOT: do not generate plot png file
25261529840c Uploaded
iracooke
parents:
diff changeset
58 NOOCCAM: non-conservative maximum protein list
25261529840c Uploaded
iracooke
parents:
diff changeset
59 GROUPWTS: check peptide's total weight in the Protein Group against the threshold (default: check peptide's actual weight against threshold)
25261529840c Uploaded
iracooke
parents:
diff changeset
60 NORMPROTLEN: Normalize NSP using Protein Length
25261529840c Uploaded
iracooke
parents:
diff changeset
61 LOGPROBS: Use the log of the probabilities in the Confidence calculations
25261529840c Uploaded
iracooke
parents:
diff changeset
62 CONFEM: Use the EM to compute probability given the confidence
25261529840c Uploaded
iracooke
parents:
diff changeset
63 ALLPEPS: Consider all possible peptides in the database in the confidence model
25261529840c Uploaded
iracooke
parents:
diff changeset
64 UNMAPPED: Report results for UNMAPPED proteins
25261529840c Uploaded
iracooke
parents:
diff changeset
65 INSTANCES: Use Expected Number of Ion Instances to adjust the peptide probabilities prior to NSP adjustment
25261529840c Uploaded
iracooke
parents:
diff changeset
66 DELUDE: do NOT use peptide degeneracy information when assessing proteins
25261529840c Uploaded
iracooke
parents:
diff changeset
67
25261529840c Uploaded
iracooke
parents:
diff changeset
68 MINPROB: peptideProphet probabilty threshold (default=0.05)
25261529840c Uploaded
iracooke
parents:
diff changeset
69 MININDEP: minimum percentage of independent peptides required for a protein (default=0)
25261529840c Uploaded
iracooke
parents:
diff changeset
70
25261529840c Uploaded
iracooke
parents:
diff changeset
71
25261529840c Uploaded
iracooke
parents:
diff changeset
72 -->
25261529840c Uploaded
iracooke
parents:
diff changeset
73
25261529840c Uploaded
iracooke
parents:
diff changeset
74 <help>
25261529840c Uploaded
iracooke
parents:
diff changeset
75
25261529840c Uploaded
iracooke
parents:
diff changeset
76 **What it does**
25261529840c Uploaded
iracooke
parents:
diff changeset
77
25261529840c Uploaded
iracooke
parents:
diff changeset
78 Given a set of peptide assignments from MS/MS spectra in the form of a pepXML file, this tool estimates probabilities at the protein level. As output, the tool produces a protXML file, which contains proteins along with the estimated probabilities that those proteins were present. Probabilities are estimated using a statistical model based on the number of peptides corresponding to that protein and the confidence that each of those peptides were assigned correctly. It takes account of the fact that peptides may correspond to more than one protein.
25261529840c Uploaded
iracooke
parents:
diff changeset
79
25261529840c Uploaded
iracooke
parents:
diff changeset
80 ----
25261529840c Uploaded
iracooke
parents:
diff changeset
81
25261529840c Uploaded
iracooke
parents:
diff changeset
82 **Citation**
25261529840c Uploaded
iracooke
parents:
diff changeset
83
25261529840c Uploaded
iracooke
parents:
diff changeset
84 If you use this tool please read and cite the paper describing the statistical model implemented by Protein Prophet
25261529840c Uploaded
iracooke
parents:
diff changeset
85
25261529840c Uploaded
iracooke
parents:
diff changeset
86 Nesvizhskii A., et al. “A Statistical Model for Identifying Proteins by Tandem Mass Spectrometry” *Anal. Chem.* 75, 4646-4658 (2003).
25261529840c Uploaded
iracooke
parents:
diff changeset
87
25261529840c Uploaded
iracooke
parents:
diff changeset
88
25261529840c Uploaded
iracooke
parents:
diff changeset
89 </help>
25261529840c Uploaded
iracooke
parents:
diff changeset
90
25261529840c Uploaded
iracooke
parents:
diff changeset
91 </tool>