0
|
1 <tool name="Quantifere" id="quantifere1" version="1.0.2">
|
|
2 <description>Protein Inference by Peptide Quantification patterns</description>
|
|
3 <!--
|
|
4 For remote debugging start you listener on port 8000 and use the following as command interpreter:
|
|
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
|
|
6 //////////////////////////
|
|
7 -->
|
|
8 <command interpreter="java -jar ">
|
|
9 Quantifere.jar
|
|
10 -annotatedQuantificationFilesList $annotatedQuantificationFilesList
|
|
11 -identificationFilesList $identificationFilesList
|
|
12 -statisticalMeasuresConfigFile $statisticalMeasuresConfigFile
|
|
13 -quantificationDataToUse $quantificationDataToUse
|
|
14 -minCorrel $minCorrel
|
|
15 -minProtCoverage $minProtCoverage
|
|
16 -minAboveAverageHits $minAboveAverageHits
|
|
17 -minNrIdsForInferencePeptide $minNrIdsForInferencePeptide
|
|
18 -refineModel $refineModel
|
|
19 -functionalAnnotationCSV $functionalAnnotationCSV
|
|
20 -outputCSV $outputCSV
|
|
21 -outputInferenceLogCSV $outputInferenceLogCSV
|
|
22 -outputSummaryAnnotationCSV $outputSummaryAnnotationCSV
|
|
23 -outReport $htmlReportFile
|
|
24 -outReportPicturesPath $htmlReportFile.files_path
|
|
25 #if $is2D_LC_MS.fractions == True
|
|
26 -namingConventionCodesForFractions $is2D_LC_MS.namingConventionCodesForFractions
|
|
27 #end if
|
|
28 </command>
|
|
29
|
|
30 <inputs>
|
|
31
|
|
32 <repeat name="annotatedQuantificationFiles" title="Peptide (filtered) quantification files (APML)"
|
|
33 help="The APML contents as aligned, annotated and scored feature lists,
|
|
34 as produced by MsFilt tool. Select one or more files. For 2D-LC-MS we expect one file per fraction.">
|
|
35 <param name="annotatedQuantificationFile" size="50" type="data" format="apml" label="File (APML format)" />
|
|
36 </repeat>
|
|
37
|
|
38 <repeat name="identificationFiles" title="Peptide (filtered) identification files (MS/MS identifications)"
|
|
39 help="Full set of MS/MS peptide identification files, including peptides that could not be quantified.
|
|
40 This set of identifications is ideally filtered on some quality and
|
|
41 statistical measures (e.g. as is done by MsFilt). Tip: to base the inference only on the
|
|
42 selected peptide quantification files, you
|
|
43 can select the same quantification files here as well. Select one or more files.">
|
|
44 <param name="identificationFile" size="50" type="data" format="apml,mzid" label="File (APML or MZIDENTML format)" />
|
|
45 </repeat>
|
|
46
|
|
47 <conditional name="is2D_LC_MS">
|
|
48 <param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
|
|
49 label="Data is from 2D LC-MS"
|
|
50 help="Data acquisition was done in multiple fractions."/>
|
|
51 <when value="Yes">
|
|
52 <param name="namingConventionCodesForFractions" type="text" size="100" value=""
|
|
53 label="Part of run/file name that identifies the 2D LC-MS fraction"
|
|
54 help="Add the CSV list of codes that occur in the file names
|
|
55 and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' In this
|
|
56 way different peptide identifications from the same sample but measured
|
|
57 in different fractions can be merged together. Otherwise each (fraction) file
|
|
58 is seen as a separate sample."/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
|
|
59 </when>
|
6
|
60 <when value="No">
|
|
61 </when>
|
0
|
62 </conditional>
|
|
63
|
|
64 <param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration"
|
|
65 help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values).
|
|
66 The format is: SM alias => SM name,type,mode[min/max]. Leaving this configuration out while these are present in the
|
|
67 dataset will have the effect that they will be wrongly used as a regular scoring scheme, having effect on for example
|
|
68 the filter criteria below like 'Minimum number of peptide matches with a score above average' ."
|
|
69 value="smXTD => MS:1001330,XSLASH!Tandem:expect,min
|
|
70 
pvCSVEX => p_value,CSV_EXPORT,min
|
|
71 
smAUTO_LIKELIHOOD => AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max
|
|
72 
smLIKELIHOOD => LOGLIKELIHOOD,PLGS/Databank-search,max
|
|
73 "/>
|
|
74 <!-- keep value attribute above aligned like this to avoid white spaces in the value -->
|
|
75 <param name="quantificationDataToUse" type="select"
|
|
76 label="Quantification data to use"
|
|
77 help="Quantification data to use for the pattern clustering and inference steps. NB: check if the chosen data is also
|
|
78 present in your file, or choose 'auto' to let Quantifere check which quantification type is present in most peptides.">
|
|
79 <option value="auto" selected="true">auto</option>
|
|
80 <option value="getIntensity">(TODO)raw intensities</option>
|
|
81 <option value="getApexIntensity">(TODO)apex intensities</option>
|
|
82 <option value="getNormalizedIntensity">(TODO)normalized intensities</option>
|
|
83 </param>
|
|
84 <!-- TODO let minCorrel default value vary according to quantification type chosen above -->
|
|
85 <param name="minCorrel" type="float" size="10" value="0.85" label="Minimum correlation in a cluster" help="Features will be grouped by their protein annotation and
|
|
86 sample intensity values correlation. Set here the minimum correlation expected between grouped members. This is used to guide the clustering algorithm."/>
|
|
87
|
|
88 <!-- simple extra heuristics to remove some "noise" protein hits -->
|
|
89 <param name="minProtCoverage" type="float" size="10" value="5.0" label="Minimum protein coverage (%)" help="This will remove proteins that have a too small
|
|
90 portion of their sequence covered by peptide matches."/>
|
|
91
|
|
92 <param name="minAboveAverageHits" type="integer" size="10" value="1" label="Minimum number of different peptide matches with a score above average"
|
|
93 help="This will remove proteins that do not have enough reasonable peptides hits."/>
|
|
94
|
|
95 <param name="minNrIdsForInferencePeptide" type="integer" size="10" value="1" label="Minimum number of peptide identifications for inference peptides"
|
|
96 help="Minimum number of peptide identifications a peptide needs to be used as inference peptide for secondary proteins."/>
|
|
97
|
|
98
|
|
99 <param name="functionalAnnotationCSV" type="data" format="csv,txt,tsv" optional="true"
|
|
100 label="(Functional)annotation mapping file (csv or tsv format)"
|
|
101 help="Optional file that maps protein accessions to a network, pathway or other higher level annotations. In this file a header line is expected with these 2 columns (names and lower case is important): accession,annotation"/>
|
|
102
|
|
103 <param name="refineModel" type="boolean" checked="true" label="Refine matches model"
|
|
104 help="This will let the algorithm search for a reduced set of secondary protein matches that still explains the variation in the peptide quantification patterns"/>
|
|
105
|
|
106
|
|
107 <param name="summaryReport" type="boolean" checked="true" label="Generate summary report"/>
|
|
108
|
|
109 </inputs>
|
|
110 <configfiles>
|
|
111 <configfile name="annotatedQuantificationFilesList">## start comment
|
|
112 ## iterate over the selected files and store their names in the config file
|
|
113 #for $i, $s in enumerate( $annotatedQuantificationFiles )
|
|
114 ${s.annotatedQuantificationFile}
|
|
115 #end for
|
|
116 ## end comment</configfile>
|
|
117
|
|
118 <configfile name="identificationFilesList">## start comment
|
|
119 ## iterate over the selected files and store their names in the config file
|
|
120 #for $i, $s in enumerate( $identificationFiles )
|
|
121 ${s.identificationFile}
|
|
122 ## also print out the datatype in the next line, based on previously configured datatype
|
|
123 #if isinstance( $s.identificationFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
|
|
124 apml
|
|
125 #else:
|
|
126 mzid
|
|
127 #end if
|
|
128 #end for
|
|
129 ## end comment</configfile>
|
|
130 <configfile name="statisticalMeasuresConfigFile">## start comment
|
|
131 ${statisticalMeasuresConfig}
|
|
132 </configfile>
|
|
133 </configfiles>
|
|
134 <outputs>
|
|
135 <data name="outputCSV" format="csv" label="${tool.name} on ${on_string}: Proteins list (CSV)" />
|
|
136 <data name="outputInferenceLogCSV" format="csv" label="${tool.name} on ${on_string}: Inference log (CSV)"/>
|
|
137 <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report">
|
|
138 <!-- If the expression is false, the file is not created -->
|
|
139 <filter>( summaryReport == True )</filter>
|
|
140 </data>
|
|
141 <data name="outputSummaryAnnotationCSV" format="csv" label="${tool.name} on ${on_string} - Functional annotation summary (CSV)">
|
|
142 <!-- If the expression is false, the file is not created -->
|
|
143 <filter>( functionalAnnotationCSV != None )</filter>
|
|
144 </data>
|
|
145 </outputs>
|
|
146 <tests>
|
|
147 </tests>
|
|
148 <help>
|
|
149
|
|
150 .. class:: infomark
|
|
151
|
|
152 This tool takes Peptide Quantification patterns and uses this to do Protein Inference of both Primary Protein
|
|
153 identifications as well as Secondary Protein identifications. This last class of protein identifications
|
|
154 can not be done by traditional protein inference methods that look only at peptide identifications and
|
|
155 their quality parameters.
|
|
156
|
|
157
|
|
158 -----
|
|
159
|
|
160 **List of definitions**
|
|
161
|
|
162 Primary Protein identification: protein identification belonging to the minimum set of proteins needed
|
|
163 to account for the observed peptides.
|
|
164
|
|
165 Secondary Protein identification: extra protein identifications that do not below to the minimum set
|
|
166 of proteins mentioned above.
|
|
167
|
|
168 raw intensities : is the intensity value resulting from the integration of the feature peak area
|
|
169
|
|
170 apex intensities: is the intensity value as on the highest point of the feature peak
|
|
171
|
|
172 normalized intensities : is the intensity normalized by some means
|
|
173
|
|
174 -----
|
|
175
|
|
176 **Minimum correlation in a cluster**
|
|
177
|
|
178 TODO - add doc.
|
|
179
|
|
180 -----
|
|
181
|
|
182 **Output details**
|
|
183
|
|
184 *Proteins list (CSV)*
|
|
185
|
|
186 This is the list of primary and secondary proteins and their calculated inference score. Proteins
|
|
187 with exactly the same peptide hits are also grouped together and labeled as primary_group and secondary_group
|
|
188 instead of simply primary and secondary.
|
|
189
|
|
190
|
|
191 *Inference log (CSV)*
|
|
192
|
|
193 This CSV table shows all data, both inferred and ruled out proteins. This can be used by the user to
|
|
194 troubleshoot the inference process and understand why certain proteins might have been ruled out.
|
|
195 The CSV is provided in such a format that the data can easily be explored in a Cytoscape network.
|
|
196
|
|
197 The figure below shows an example of the data being explored in Cytoscape using also the
|
|
198 `Cytoscape chartplugin`_ to visualize the quantification data when selecting the peptide nodes.
|
|
199
|
|
200 .. image:: $PATH_TO_IMAGES/quantifere_cyto_out.png
|
|
201
|
4
|
202 .
|
0
|
203
|
|
204 .. _Cytoscape chartplugin: http://apps.cytoscape.org/apps/chartplugin
|
|
205
|
|
206
|
|
207
|
|
208 </help>
|
|
209 </tool>
|