Mercurial > repos > pieterlukasse > prims_proteomics
comparison quantifere.xml @ 0:d50f079096ee
Push to main toolshed
author | pieter.lukasse@wur.nl |
---|---|
date | Wed, 08 Jan 2014 11:39:16 +0100 |
parents | |
children | 73c7c6589202 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d50f079096ee |
---|---|
1 <tool name="Quantifere" id="quantifere1" version="1.0.2"> | |
2 <description>Protein Inference by Peptide Quantification patterns</description> | |
3 <!-- | |
4 For remote debugging start you listener on port 8000 and use the following as command interpreter: | |
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000 | |
6 ////////////////////////// | |
7 --> | |
8 <command interpreter="java -jar "> | |
9 Quantifere.jar | |
10 -annotatedQuantificationFilesList $annotatedQuantificationFilesList | |
11 -identificationFilesList $identificationFilesList | |
12 -statisticalMeasuresConfigFile $statisticalMeasuresConfigFile | |
13 -quantificationDataToUse $quantificationDataToUse | |
14 -minCorrel $minCorrel | |
15 -minProtCoverage $minProtCoverage | |
16 -minAboveAverageHits $minAboveAverageHits | |
17 -minNrIdsForInferencePeptide $minNrIdsForInferencePeptide | |
18 -refineModel $refineModel | |
19 -functionalAnnotationCSV $functionalAnnotationCSV | |
20 -outputCSV $outputCSV | |
21 -outputInferenceLogCSV $outputInferenceLogCSV | |
22 -outputSummaryAnnotationCSV $outputSummaryAnnotationCSV | |
23 -outReport $htmlReportFile | |
24 -outReportPicturesPath $htmlReportFile.files_path | |
25 #if $is2D_LC_MS.fractions == True | |
26 -namingConventionCodesForFractions $is2D_LC_MS.namingConventionCodesForFractions | |
27 #end if | |
28 </command> | |
29 | |
30 <inputs> | |
31 | |
32 <repeat name="annotatedQuantificationFiles" title="Peptide (filtered) quantification files (APML)" | |
33 help="The APML contents as aligned, annotated and scored feature lists, | |
34 as produced by MsFilt tool. Select one or more files. For 2D-LC-MS we expect one file per fraction."> | |
35 <param name="annotatedQuantificationFile" size="50" type="data" format="apml" label="File (APML format)" /> | |
36 </repeat> | |
37 | |
38 <repeat name="identificationFiles" title="Peptide (filtered) identification files (MS/MS identifications)" | |
39 help="Full set of MS/MS peptide identification files, including peptides that could not be quantified. | |
40 This set of identifications is ideally filtered on some quality and | |
41 statistical measures (e.g. as is done by MsFilt). Tip: to base the inference only on the | |
42 selected peptide quantification files, you | |
43 can select the same quantification files here as well. Select one or more files."> | |
44 <param name="identificationFile" size="50" type="data" format="apml,mzid" label="File (APML or MZIDENTML format)" /> | |
45 </repeat> | |
46 | |
47 <conditional name="is2D_LC_MS"> | |
48 <param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false" | |
49 label="Data is from 2D LC-MS" | |
50 help="Data acquisition was done in multiple fractions."/> | |
51 <when value="Yes"> | |
52 <param name="namingConventionCodesForFractions" type="text" size="100" value="" | |
53 label="Part of run/file name that identifies the 2D LC-MS fraction" | |
54 help="Add the CSV list of codes that occur in the file names | |
55 and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' In this | |
56 way different peptide identifications from the same sample but measured | |
57 in different fractions can be merged together. Otherwise each (fraction) file | |
58 is seen as a separate sample."/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b --> | |
59 </when> | |
60 </conditional> | |
61 | |
62 <param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration" | |
63 help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values). | |
64 The format is: SM alias => SM name,type,mode[min/max]. Leaving this configuration out while these are present in the | |
65 dataset will have the effect that they will be wrongly used as a regular scoring scheme, having effect on for example | |
66 the filter criteria below like 'Minimum number of peptide matches with a score above average' ." | |
67 value="smXTD => MS:1001330,XSLASH!Tandem:expect,min | |
68 
pvCSVEX => p_value,CSV_EXPORT,min | |
69 
smAUTO_LIKELIHOOD => AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max | |
70 
smLIKELIHOOD => LOGLIKELIHOOD,PLGS/Databank-search,max | |
71 "/> | |
72 <!-- keep value attribute above aligned like this to avoid white spaces in the value --> | |
73 <param name="quantificationDataToUse" type="select" | |
74 label="Quantification data to use" | |
75 help="Quantification data to use for the pattern clustering and inference steps. NB: check if the chosen data is also | |
76 present in your file, or choose 'auto' to let Quantifere check which quantification type is present in most peptides."> | |
77 <option value="auto" selected="true">auto</option> | |
78 <option value="getIntensity">(TODO)raw intensities</option> | |
79 <option value="getApexIntensity">(TODO)apex intensities</option> | |
80 <option value="getNormalizedIntensity">(TODO)normalized intensities</option> | |
81 </param> | |
82 <!-- TODO let minCorrel default value vary according to quantification type chosen above --> | |
83 <param name="minCorrel" type="float" size="10" value="0.85" label="Minimum correlation in a cluster" help="Features will be grouped by their protein annotation and | |
84 sample intensity values correlation. Set here the minimum correlation expected between grouped members. This is used to guide the clustering algorithm."/> | |
85 | |
86 <!-- simple extra heuristics to remove some "noise" protein hits --> | |
87 <param name="minProtCoverage" type="float" size="10" value="5.0" label="Minimum protein coverage (%)" help="This will remove proteins that have a too small | |
88 portion of their sequence covered by peptide matches."/> | |
89 | |
90 <param name="minAboveAverageHits" type="integer" size="10" value="1" label="Minimum number of different peptide matches with a score above average" | |
91 help="This will remove proteins that do not have enough reasonable peptides hits."/> | |
92 | |
93 <param name="minNrIdsForInferencePeptide" type="integer" size="10" value="1" label="Minimum number of peptide identifications for inference peptides" | |
94 help="Minimum number of peptide identifications a peptide needs to be used as inference peptide for secondary proteins."/> | |
95 | |
96 | |
97 <param name="functionalAnnotationCSV" type="data" format="csv,txt,tsv" optional="true" | |
98 label="(Functional)annotation mapping file (csv or tsv format)" | |
99 help="Optional file that maps protein accessions to a network, pathway or other higher level annotations. In this file a header line is expected with these 2 columns (names and lower case is important): accession,annotation"/> | |
100 | |
101 <param name="refineModel" type="boolean" checked="true" label="Refine matches model" | |
102 help="This will let the algorithm search for a reduced set of secondary protein matches that still explains the variation in the peptide quantification patterns"/> | |
103 | |
104 | |
105 <param name="summaryReport" type="boolean" checked="true" label="Generate summary report"/> | |
106 | |
107 </inputs> | |
108 <configfiles> | |
109 <configfile name="annotatedQuantificationFilesList">## start comment | |
110 ## iterate over the selected files and store their names in the config file | |
111 #for $i, $s in enumerate( $annotatedQuantificationFiles ) | |
112 ${s.annotatedQuantificationFile} | |
113 #end for | |
114 ## end comment</configfile> | |
115 | |
116 <configfile name="identificationFilesList">## start comment | |
117 ## iterate over the selected files and store their names in the config file | |
118 #for $i, $s in enumerate( $identificationFiles ) | |
119 ${s.identificationFile} | |
120 ## also print out the datatype in the next line, based on previously configured datatype | |
121 #if isinstance( $s.identificationFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__): | |
122 apml | |
123 #else: | |
124 mzid | |
125 #end if | |
126 #end for | |
127 ## end comment</configfile> | |
128 <configfile name="statisticalMeasuresConfigFile">## start comment | |
129 ${statisticalMeasuresConfig} | |
130 </configfile> | |
131 </configfiles> | |
132 <outputs> | |
133 <data name="outputCSV" format="csv" label="${tool.name} on ${on_string}: Proteins list (CSV)" /> | |
134 <data name="outputInferenceLogCSV" format="csv" label="${tool.name} on ${on_string}: Inference log (CSV)"/> | |
135 <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"> | |
136 <!-- If the expression is false, the file is not created --> | |
137 <filter>( summaryReport == True )</filter> | |
138 </data> | |
139 <data name="outputSummaryAnnotationCSV" format="csv" label="${tool.name} on ${on_string} - Functional annotation summary (CSV)"> | |
140 <!-- If the expression is false, the file is not created --> | |
141 <filter>( functionalAnnotationCSV != None )</filter> | |
142 </data> | |
143 </outputs> | |
144 <tests> | |
145 </tests> | |
146 <help> | |
147 | |
148 .. class:: infomark | |
149 | |
150 This tool takes Peptide Quantification patterns and uses this to do Protein Inference of both Primary Protein | |
151 identifications as well as Secondary Protein identifications. This last class of protein identifications | |
152 can not be done by traditional protein inference methods that look only at peptide identifications and | |
153 their quality parameters. | |
154 | |
155 | |
156 ----- | |
157 | |
158 **List of definitions** | |
159 | |
160 Primary Protein identification: protein identification belonging to the minimum set of proteins needed | |
161 to account for the observed peptides. | |
162 | |
163 Secondary Protein identification: extra protein identifications that do not below to the minimum set | |
164 of proteins mentioned above. | |
165 | |
166 raw intensities : is the intensity value resulting from the integration of the feature peak area | |
167 | |
168 apex intensities: is the intensity value as on the highest point of the feature peak | |
169 | |
170 normalized intensities : is the intensity normalized by some means | |
171 | |
172 ----- | |
173 | |
174 **Minimum correlation in a cluster** | |
175 | |
176 TODO - add doc. | |
177 | |
178 ----- | |
179 | |
180 **Output details** | |
181 | |
182 *Proteins list (CSV)* | |
183 | |
184 This is the list of primary and secondary proteins and their calculated inference score. Proteins | |
185 with exactly the same peptide hits are also grouped together and labeled as primary_group and secondary_group | |
186 instead of simply primary and secondary. | |
187 | |
188 | |
189 *Inference log (CSV)* | |
190 | |
191 This CSV table shows all data, both inferred and ruled out proteins. This can be used by the user to | |
192 troubleshoot the inference process and understand why certain proteins might have been ruled out. | |
193 The CSV is provided in such a format that the data can easily be explored in a Cytoscape network. | |
194 | |
195 The figure below shows an example of the data being explored in Cytoscape using also the | |
196 `Cytoscape chartplugin`_ to visualize the quantification data when selecting the peptide nodes. | |
197 | |
198 .. image:: $PATH_TO_IMAGES/quantifere_cyto_out.png | |
199 | |
200 | |
201 .. _Cytoscape chartplugin: http://apps.cytoscape.org/apps/chartplugin | |
202 | |
203 | |
204 | |
205 </help> | |
206 </tool> |