comparison quantifere.xml @ 0:d50f079096ee

Push to main toolshed
author pieter.lukasse@wur.nl
date Wed, 08 Jan 2014 11:39:16 +0100
parents
children 73c7c6589202
comparison
equal deleted inserted replaced
-1:000000000000 0:d50f079096ee
1 <tool name="Quantifere" id="quantifere1" version="1.0.2">
2 <description>Protein Inference by Peptide Quantification patterns</description>
3 <!--
4 For remote debugging start you listener on port 8000 and use the following as command interpreter:
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
6 //////////////////////////
7 -->
8 <command interpreter="java -jar ">
9 Quantifere.jar
10 -annotatedQuantificationFilesList $annotatedQuantificationFilesList
11 -identificationFilesList $identificationFilesList
12 -statisticalMeasuresConfigFile $statisticalMeasuresConfigFile
13 -quantificationDataToUse $quantificationDataToUse
14 -minCorrel $minCorrel
15 -minProtCoverage $minProtCoverage
16 -minAboveAverageHits $minAboveAverageHits
17 -minNrIdsForInferencePeptide $minNrIdsForInferencePeptide
18 -refineModel $refineModel
19 -functionalAnnotationCSV $functionalAnnotationCSV
20 -outputCSV $outputCSV
21 -outputInferenceLogCSV $outputInferenceLogCSV
22 -outputSummaryAnnotationCSV $outputSummaryAnnotationCSV
23 -outReport $htmlReportFile
24 -outReportPicturesPath $htmlReportFile.files_path
25 #if $is2D_LC_MS.fractions == True
26 -namingConventionCodesForFractions $is2D_LC_MS.namingConventionCodesForFractions
27 #end if
28 </command>
29
30 <inputs>
31
32 <repeat name="annotatedQuantificationFiles" title="Peptide (filtered) quantification files (APML)"
33 help="The APML contents as aligned, annotated and scored feature lists,
34 as produced by MsFilt tool. Select one or more files. For 2D-LC-MS we expect one file per fraction.">
35 <param name="annotatedQuantificationFile" size="50" type="data" format="apml" label="File (APML format)" />
36 </repeat>
37
38 <repeat name="identificationFiles" title="Peptide (filtered) identification files (MS/MS identifications)"
39 help="Full set of MS/MS peptide identification files, including peptides that could not be quantified.
40 This set of identifications is ideally filtered on some quality and
41 statistical measures (e.g. as is done by MsFilt). Tip: to base the inference only on the
42 selected peptide quantification files, you
43 can select the same quantification files here as well. Select one or more files.">
44 <param name="identificationFile" size="50" type="data" format="apml,mzid" label="File (APML or MZIDENTML format)" />
45 </repeat>
46
47 <conditional name="is2D_LC_MS">
48 <param name="fractions" type="boolean" truevalue="Yes" falsevalue="No" checked="false"
49 label="Data is from 2D LC-MS"
50 help="Data acquisition was done in multiple fractions."/>
51 <when value="Yes">
52 <param name="namingConventionCodesForFractions" type="text" size="100" value=""
53 label="Part of run/file name that identifies the 2D LC-MS fraction"
54 help="Add the CSV list of codes that occur in the file names
55 and that stand for a fraction code. E.g. '_F1,_F2,_F3,etc.' In this
56 way different peptide identifications from the same sample but measured
57 in different fractions can be merged together. Otherwise each (fraction) file
58 is seen as a separate sample."/> <!-- could do regular expressions as well but this would be hard for biologists, e.g. _F\d\b -->
59 </when>
60 </conditional>
61
62 <param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration"
63 help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values).
64 The format is: SM alias => SM name,type,mode[min/max]. Leaving this configuration out while these are present in the
65 dataset will have the effect that they will be wrongly used as a regular scoring scheme, having effect on for example
66 the filter criteria below like 'Minimum number of peptide matches with a score above average' ."
67 value="smXTD =&gt; MS:1001330,XSLASH!Tandem:expect,min
68 &#xd;&#xa;pvCSVEX =&gt; p_value,CSV_EXPORT,min
69 &#xd;&#xa;smAUTO_LIKELIHOOD =&gt; AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max
70 &#xd;&#xa;smLIKELIHOOD =&gt; LOGLIKELIHOOD,PLGS/Databank-search,max
71 "/>
72 <!-- keep value attribute above aligned like this to avoid white spaces in the value -->
73 <param name="quantificationDataToUse" type="select"
74 label="Quantification data to use"
75 help="Quantification data to use for the pattern clustering and inference steps. NB: check if the chosen data is also
76 present in your file, or choose 'auto' to let Quantifere check which quantification type is present in most peptides.">
77 <option value="auto" selected="true">auto</option>
78 <option value="getIntensity">(TODO)raw intensities</option>
79 <option value="getApexIntensity">(TODO)apex intensities</option>
80 <option value="getNormalizedIntensity">(TODO)normalized intensities</option>
81 </param>
82 <!-- TODO let minCorrel default value vary according to quantification type chosen above -->
83 <param name="minCorrel" type="float" size="10" value="0.85" label="Minimum correlation in a cluster" help="Features will be grouped by their protein annotation and
84 sample intensity values correlation. Set here the minimum correlation expected between grouped members. This is used to guide the clustering algorithm."/>
85
86 <!-- simple extra heuristics to remove some "noise" protein hits -->
87 <param name="minProtCoverage" type="float" size="10" value="5.0" label="Minimum protein coverage (%)" help="This will remove proteins that have a too small
88 portion of their sequence covered by peptide matches."/>
89
90 <param name="minAboveAverageHits" type="integer" size="10" value="1" label="Minimum number of different peptide matches with a score above average"
91 help="This will remove proteins that do not have enough reasonable peptides hits."/>
92
93 <param name="minNrIdsForInferencePeptide" type="integer" size="10" value="1" label="Minimum number of peptide identifications for inference peptides"
94 help="Minimum number of peptide identifications a peptide needs to be used as inference peptide for secondary proteins."/>
95
96
97 <param name="functionalAnnotationCSV" type="data" format="csv,txt,tsv" optional="true"
98 label="(Functional)annotation mapping file (csv or tsv format)"
99 help="Optional file that maps protein accessions to a network, pathway or other higher level annotations. In this file a header line is expected with these 2 columns (names and lower case is important): accession,annotation"/>
100
101 <param name="refineModel" type="boolean" checked="true" label="Refine matches model"
102 help="This will let the algorithm search for a reduced set of secondary protein matches that still explains the variation in the peptide quantification patterns"/>
103
104
105 <param name="summaryReport" type="boolean" checked="true" label="Generate summary report"/>
106
107 </inputs>
108 <configfiles>
109 <configfile name="annotatedQuantificationFilesList">## start comment
110 ## iterate over the selected files and store their names in the config file
111 #for $i, $s in enumerate( $annotatedQuantificationFiles )
112 ${s.annotatedQuantificationFile}
113 #end for
114 ## end comment</configfile>
115
116 <configfile name="identificationFilesList">## start comment
117 ## iterate over the selected files and store their names in the config file
118 #for $i, $s in enumerate( $identificationFiles )
119 ${s.identificationFile}
120 ## also print out the datatype in the next line, based on previously configured datatype
121 #if isinstance( $s.identificationFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
122 apml
123 #else:
124 mzid
125 #end if
126 #end for
127 ## end comment</configfile>
128 <configfile name="statisticalMeasuresConfigFile">## start comment
129 ${statisticalMeasuresConfig}
130 </configfile>
131 </configfiles>
132 <outputs>
133 <data name="outputCSV" format="csv" label="${tool.name} on ${on_string}: Proteins list (CSV)" />
134 <data name="outputInferenceLogCSV" format="csv" label="${tool.name} on ${on_string}: Inference log (CSV)"/>
135 <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report">
136 <!-- If the expression is false, the file is not created -->
137 <filter>( summaryReport == True )</filter>
138 </data>
139 <data name="outputSummaryAnnotationCSV" format="csv" label="${tool.name} on ${on_string} - Functional annotation summary (CSV)">
140 <!-- If the expression is false, the file is not created -->
141 <filter>( functionalAnnotationCSV != None )</filter>
142 </data>
143 </outputs>
144 <tests>
145 </tests>
146 <help>
147
148 .. class:: infomark
149
150 This tool takes Peptide Quantification patterns and uses this to do Protein Inference of both Primary Protein
151 identifications as well as Secondary Protein identifications. This last class of protein identifications
152 can not be done by traditional protein inference methods that look only at peptide identifications and
153 their quality parameters.
154
155
156 -----
157
158 **List of definitions**
159
160 Primary Protein identification: protein identification belonging to the minimum set of proteins needed
161 to account for the observed peptides.
162
163 Secondary Protein identification: extra protein identifications that do not below to the minimum set
164 of proteins mentioned above.
165
166 raw intensities : is the intensity value resulting from the integration of the feature peak area
167
168 apex intensities: is the intensity value as on the highest point of the feature peak
169
170 normalized intensities : is the intensity normalized by some means
171
172 -----
173
174 **Minimum correlation in a cluster**
175
176 TODO - add doc.
177
178 -----
179
180 **Output details**
181
182 *Proteins list (CSV)*
183
184 This is the list of primary and secondary proteins and their calculated inference score. Proteins
185 with exactly the same peptide hits are also grouped together and labeled as primary_group and secondary_group
186 instead of simply primary and secondary.
187
188
189 *Inference log (CSV)*
190
191 This CSV table shows all data, both inferred and ruled out proteins. This can be used by the user to
192 troubleshoot the inference process and understand why certain proteins might have been ruled out.
193 The CSV is provided in such a format that the data can easily be explored in a Cytoscape network.
194
195 The figure below shows an example of the data being explored in Cytoscape using also the
196 `Cytoscape chartplugin`_ to visualize the quantification data when selecting the peptide nodes.
197
198 .. image:: $PATH_TO_IMAGES/quantifere_cyto_out.png
199
200
201 .. _Cytoscape chartplugin: http://apps.cytoscape.org/apps/chartplugin
202
203
204
205 </help>
206 </tool>