Mercurial > repos > pieterlukasse > prims_proteomics
annotate quantifere.xml @ 23:b1f68fcb7d99
fix
author | pieter.lukasse@wur.nl |
---|---|
date | Mon, 26 Jan 2015 07:06:22 +0100 |
parents | d31c6978d9d0 |
children | 5215b5cfdc53 |
rev | line source |
---|---|
17
40ec8770780d
* Added support for pepxml (and more specifically for
pieter.lukasse@wur.nl
parents:
6
diff
changeset
|
1 <tool name="Quantifere" id="quantifere1" version="1.0.3"> |
0 | 2 <description>Protein Inference by Peptide Quantification patterns</description> |
3 <!-- | |
4 For remote debugging start you listener on port 8000 and use the following as command interpreter: | |
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 | |
6 ////////////////////////// | |
7 --> | |
8 <command interpreter="java -jar "> | |
9 Quantifere.jar | |
10 -annotatedQuantificationFilesList $annotatedQuantificationFilesList | |
11 -identificationFilesList $identificationFilesList | |
12 -statisticalMeasuresConfigFile $statisticalMeasuresConfigFile | |
13 -quantificationDataToUse $quantificationDataToUse | |
14 -minCorrel $minCorrel | |
15 -minProtCoverage $minProtCoverage | |
16 -minAboveAverageHits $minAboveAverageHits | |
17 -minNrIdsForInferencePeptide $minNrIdsForInferencePeptide | |
18 -refineModel $refineModel | |
19 -functionalAnnotationCSV $functionalAnnotationCSV | |
20 -outputCSV $outputCSV | |
21 -outputInferenceLogCSV $outputInferenceLogCSV | |
22 -outputSummaryAnnotationCSV $outputSummaryAnnotationCSV | |
23 -outReport $htmlReportFile | |
24 -outReportPicturesPath $htmlReportFile.files_path | |
25 #if $is2D_LC_MS.fractions == True | |
26 -namingConventionCodesForFractions $is2D_LC_MS.namingConventionCodesForFractions | |
27 #end if | |
28 </command> | |
29 | |
30 <inputs> | |
31 | |
32 <repeat name="annotatedQuantificationFiles" title="Peptide (filtered) quantification files (APML)" | |
33 help="The APML contents as aligned, annotated and scored feature lists, | |
34 as produced by MsFilt tool. Select one or more files. For 2D-LC-MS we expect one file per fraction."> | |
35 <param name="annotatedQuantificationFile" size="50" type="data" format="apml" label="File (APML format)" /> | |
36 </repeat> | |
37 | |
38 <repeat name="identificationFiles" title="Peptide (filtered) identification files (MS/MS identifications)" | |
39 help="Full set of MS/MS peptide identification files, including peptides that could not be quantified. | |
40 This set of identifications is ideally filtered on some quality and | |
41 statistical measures (e.g. as is done by MsFilt). Tip: to base the inference only on the | |
42 selected peptide quantification files, you | |
43 can select the same quantification files here as well. Select one or more files."> | |
44 <param name="identificationFile" size="50" type="data" format="apml,mzid" label="File (APML or MZIDENTML format)" /> | |
45 </repeat> | |
46 | |
47 <conditional name="is2D_LC_MS"> | |
48 <param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false" | |
49 label="Data is from 2D LC-MS" | |
50 help="Data acquisition was done in multiple fractions."/> | |
51 <when value="Yes"> | |
52 <param name="namingConventionCodesForFractions" type="text" size="100" value="" | |
53 label="Part of run/file name that identifies the 2D LC-MS fraction" | |
54 help="Add the CSV list of codes that occur in the file names | |
55 and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' In this | |
56 way different peptide identifications from the same sample but measured | |
57 in different fractions can be merged together. Otherwise each (fraction) file | |
58 is seen as a separate sample."/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b --> | |
19 | 59 <!-- on help above: the given codes are removed from source name...separate features are clustered, not peptides, peptides |
60 are quantified based on summing features (raw), or summing patterns : TODO document the quantification columns present in the output CSV --> | |
0 | 61 </when> |
6 | 62 <when value="No"> |
63 </when> | |
0 | 64 </conditional> |
65 | |
17
40ec8770780d
* Added support for pepxml (and more specifically for
pieter.lukasse@wur.nl
parents:
6
diff
changeset
|
66 <param name="statisticalMeasuresConfig" type="text" area="true" size="8x70" label="Statistical measures configuration" |
0 | 67 help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values). |
68 The format is: SM alias => SM name,type,mode[min/max]. Leaving this configuration out while these are present in the | |
69 dataset will have the effect that they will be wrongly used as a regular scoring scheme, having effect on for example | |
70 the filter criteria below like 'Minimum number of peptide matches with a score above average' ." | |
71 value="smXTD => MS:1001330,XSLASH!Tandem:expect,min | |
72 
pvCSVEX => p_value,CSV_EXPORT,min | |
73 
smAUTO_LIKELIHOOD => AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max | |
74 
smLIKELIHOOD => LOGLIKELIHOOD,PLGS/Databank-search,max | |
17
40ec8770780d
* Added support for pepxml (and more specifically for
pieter.lukasse@wur.nl
parents:
6
diff
changeset
|
75 
smPercoProb => Percolator: probability,Percolator probability,max |
40ec8770780d
* Added support for pepxml (and more specifically for
pieter.lukasse@wur.nl
parents:
6
diff
changeset
|
76 
smPercoPEP => Percolator: PEP,Percolator PEP,min |
40ec8770780d
* Added support for pepxml (and more specifically for
pieter.lukasse@wur.nl
parents:
6
diff
changeset
|
77 
smPercoQval => Percolator: q-Value,Percolator q-Value,max |
0 | 78 "/> |
79 <!-- keep value attribute above aligned like this to avoid white spaces in the value --> | |
80 <param name="quantificationDataToUse" type="select" | |
81 label="Quantification data to use" | |
82 help="Quantification data to use for the pattern clustering and inference steps. NB: check if the chosen data is also | |
83 present in your file, or choose 'auto' to let Quantifere check which quantification type is present in most peptides."> | |
84 <option value="auto" selected="true">auto</option> | |
85 <option value="getIntensity">(TODO)raw intensities</option> | |
86 <option value="getApexIntensity">(TODO)apex intensities</option> | |
87 <option value="getNormalizedIntensity">(TODO)normalized intensities</option> | |
88 </param> | |
89 <!-- TODO let minCorrel default value vary according to quantification type chosen above --> | |
90 <param name="minCorrel" type="float" size="10" value="0.85" label="Minimum correlation in a cluster" help="Features will be grouped by their protein annotation and | |
91 sample intensity values correlation. Set here the minimum correlation expected between grouped members. This is used to guide the clustering algorithm."/> | |
92 | |
93 <!-- simple extra heuristics to remove some "noise" protein hits --> | |
19 | 94 <param name="minProtCoverage" type="float" size="10" value="0.0" label="Minimum protein coverage (%)" help="Set this to e.g. 5.0 if you have protein coverage |
95 information in your data. This will remove proteins that have a too small portion of their sequence covered by peptide matches."/> | |
96 <!-- TODO : ADD warning to report if this is left 0 and no coverage is found ...or maybe validate the other way around--> | |
0 | 97 |
98 <param name="minAboveAverageHits" type="integer" size="10" value="1" label="Minimum number of different peptide matches with a score above average" | |
99 help="This will remove proteins that do not have enough reasonable peptides hits."/> | |
100 | |
101 <param name="minNrIdsForInferencePeptide" type="integer" size="10" value="1" label="Minimum number of peptide identifications for inference peptides" | |
102 help="Minimum number of peptide identifications a peptide needs to be used as inference peptide for secondary proteins."/> | |
19 | 103 <!-- currently, when one feature clusters with foreign peptide, then it is not inference peptide anymore...quite strict, could be less strict |
104 by letting user indicate for example: 90% of features should be inference features...then it is an inference pep. See QuantifereTool.inferSecondaryProteins() --> | |
0 | 105 |
106 <param name="functionalAnnotationCSV" type="data" format="csv,txt,tsv" optional="true" | |
107 label="(Functional)annotation mapping file (csv or tsv format)" | |
108 help="Optional file that maps protein accessions to a network, pathway or other higher level annotations. In this file a header line is expected with these 2 columns (names and lower case is important): accession,annotation"/> | |
109 | |
110 <param name="refineModel" type="boolean" checked="true" label="Refine matches model" | |
111 help="This will let the algorithm search for a reduced set of secondary protein matches that still explains the variation in the peptide quantification patterns"/> | |
112 | |
113 | |
114 <param name="summaryReport" type="boolean" checked="true" label="Generate summary report"/> | |
115 | |
116 </inputs> | |
117 <configfiles> | |
118 <configfile name="annotatedQuantificationFilesList">## start comment | |
119 ## iterate over the selected files and store their names in the config file | |
120 #for $i, $s in enumerate( $annotatedQuantificationFiles ) | |
121 ${s.annotatedQuantificationFile} | |
122 #end for | |
123 ## end comment</configfile> | |
124 | |
125 <configfile name="identificationFilesList">## start comment | |
126 ## iterate over the selected files and store their names in the config file | |
127 #for $i, $s in enumerate( $identificationFiles ) | |
128 ${s.identificationFile} | |
129 ## also print out the datatype in the next line, based on previously configured datatype | |
130 #if isinstance( $s.identificationFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__): | |
131 apml | |
132 #else: | |
133 mzid | |
134 #end if | |
135 #end for | |
136 ## end comment</configfile> | |
137 <configfile name="statisticalMeasuresConfigFile">## start comment | |
138 ${statisticalMeasuresConfig} | |
139 </configfile> | |
140 </configfiles> | |
141 <outputs> | |
142 <data name="outputCSV" format="csv" label="${tool.name} on ${on_string}: Proteins list (CSV)" /> | |
143 <data name="outputInferenceLogCSV" format="csv" label="${tool.name} on ${on_string}: Inference log (CSV)"/> | |
144 <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"> | |
145 <!-- If the expression is false, the file is not created --> | |
146 <filter>( summaryReport == True )</filter> | |
147 </data> | |
148 <data name="outputSummaryAnnotationCSV" format="csv" label="${tool.name} on ${on_string} - Functional annotation summary (CSV)"> | |
149 <!-- If the expression is false, the file is not created --> | |
150 <filter>( functionalAnnotationCSV != None )</filter> | |
151 </data> | |
152 </outputs> | |
153 <tests> | |
154 </tests> | |
155 <help> | |
156 | |
157 .. class:: infomark | |
158 | |
159 This tool takes Peptide Quantification patterns and uses this to do Protein Inference of both Primary Protein | |
160 identifications as well as Secondary Protein identifications. This last class of protein identifications | |
161 can not be done by traditional protein inference methods that look only at peptide identifications and | |
162 their quality parameters. | |
163 | |
164 | |
165 ----- | |
166 | |
167 **List of definitions** | |
168 | |
169 Primary Protein identification: protein identification belonging to the minimum set of proteins needed | |
170 to account for the observed peptides. | |
171 | |
172 Secondary Protein identification: extra protein identifications that do not below to the minimum set | |
173 of proteins mentioned above. | |
174 | |
175 raw intensities : is the intensity value resulting from the integration of the feature peak area | |
176 | |
177 apex intensities: is the intensity value as on the highest point of the feature peak | |
178 | |
179 normalized intensities : is the intensity normalized by some means | |
180 | |
181 ----- | |
182 | |
183 **Minimum correlation in a cluster** | |
184 | |
185 TODO - add doc. | |
186 | |
187 ----- | |
188 | |
189 **Output details** | |
190 | |
191 *Proteins list (CSV)* | |
192 | |
193 This is the list of primary and secondary proteins and their calculated inference score. Proteins | |
194 with exactly the same peptide hits are also grouped together and labeled as primary_group and secondary_group | |
195 instead of simply primary and secondary. | |
196 | |
197 | |
198 *Inference log (CSV)* | |
199 | |
200 This CSV table shows all data, both inferred and ruled out proteins. This can be used by the user to | |
201 troubleshoot the inference process and understand why certain proteins might have been ruled out. | |
202 The CSV is provided in such a format that the data can easily be explored in a Cytoscape network. | |
203 | |
204 The figure below shows an example of the data being explored in Cytoscape using also the | |
205 `Cytoscape chartplugin`_ to visualize the quantification data when selecting the peptide nodes. | |
206 | |
207 .. image:: $PATH_TO_IMAGES/quantifere_cyto_out.png | |
208 | |
4 | 209 . |
0 | 210 |
211 .. _Cytoscape chartplugin: http://apps.cytoscape.org/apps/chartplugin | |
212 | |
213 | |
19 | 214 **References** |
215 | |
216 If you use this Galaxy tool in work leading to a scientific publication please | |
217 cite the following papers: | |
218 | |
219 Pieter N. J. Lukasse and Antoine H. P. America (2014). | |
220 Protein Inference Using Peptide Quantification Patterns. | |
221 http://dx.doi.org/10.1021/pr401072g | |
222 | |
0 | 223 |
224 </help> | |
19 | 225 <citations> |
226 <citation type="doi">10.1021/pr401072g</citation> <!-- example | |
227 see also https://wiki.galaxyproject.org/Admin/Tools/ToolConfigSyntax#A.3Ccitations.3E_tag_set | |
228 --> | |
229 </citations> | |
0 | 230 </tool> |