10
|
1 <tool name="MsFilt" id="msfilt" version="1.0.3">
|
0
|
2 <description>Filters annotations based MS/MS peptide identification and annotation quality measures</description>
|
|
3 <!--
|
|
4 For remote debugging start you listener on port 8000 and use the following as command interpreter:
|
|
5 java -jar -Xdebug -Xrunjdwp:transport=dt_socket,address=D0100564.wurnet.nl:8000
|
|
6 //////////////////////////
|
|
7 -->
|
|
8 <command interpreter="java -jar ">
|
|
9 MsFilt.jar
|
|
10 -apmlFile $apmlFile
|
|
11 -datasetCode $apmlFile.metadata.base_name
|
|
12 -rankingMetadataFile $rankingMetadataFile
|
|
13 -statisticalMeasuresConfigFile $statisticalMeasuresConfigFile
|
|
14 -annotationSourceConfigFile $annotationSourceConfigFile
|
|
15 -outApml $outputApml
|
|
16 -outNewIdsApml $outNewIdsApml
|
|
17 -outFullCSV $outputCSV
|
|
18 -outRankingTable $outRankingTable
|
|
19 -outProteinCoverageCSV $outProteinCoverageCSV
|
|
20 -fpCriteriaExpression "$fpCriteriaExpression"
|
|
21 -filterOutFPAnnotations $filterOutFPAnnotations
|
|
22 -fpCriteriaExpressionForIds "$fpCriteriaExpressionForIds"
|
|
23 -filterOutFPIds $filterOutFPIds
|
|
24 -filterOutUnannotatedAlignments $filterOutUnannotatedAlignments
|
|
25 -addRawRankingInfo $addRawRankingInfo
|
|
26 -addScaledIntensityInfo $addScaledIntensityInfo
|
|
27 -addRawIntensityInfo $addRawIntensityInfo
|
|
28 -outReport $htmlReportFile
|
|
29 -outReportPicturesPath $htmlReportFile.files_path
|
|
30 </command>
|
|
31
|
|
32 <inputs>
|
|
33
|
|
34 <param name="apmlFile" type="data" format="apml" optional="true"
|
|
35 label="(Optional) Peptide quantification file (APML)"
|
|
36 help="The APML contents as aligned and annotated feature lists. E.g. produced by
|
|
37 SEDMAT or Quantiline tools." />
|
|
38
|
|
39 <repeat name="annotationSourceFiles" title="(Optional) Peptide identification files" help="Full set of MS/MS peptide identification files, including peptides that could not be quantified.">
|
|
40 <param name="identificationsFile" type="data" format="apml,mzidentml,prims.fileset.zip" label="Identifications file (APML or MZIDENTML or MZIDENTML fileSet)" />
|
|
41 </repeat>
|
|
42
|
|
43 <!--
|
|
44 <param name="maxNrRankings" type="integer" size="10" value="0" label="Maximum nr. of items to leave in the final ranking (set=0 for no limit) " />
|
|
45 -->
|
|
46 <!-- TODO add info somewhere that deltaRt is 'corrected deltaRt' -->
|
|
47 <param name="rankingWeightConfig" type="text" area="true" size="13x70" label="Quality Measures (qm's) and ranking weights configuration"
|
|
48 help="Here you may specify a weight for each of the Quality Measures (QMs). These are used for the final QM score and possibly for ranking (e.g. in case of label-free data
|
|
49 processed by SEDMAT). The format is: QM alias => QM name,weight. "
|
|
50 value="qmDRT => delta rt (standard score),1
|
|
51 
qmDMA => delta mass annotation (standard score),1
|
|
52 
qmDMP => delta mass psm (standard score),1
|
|
53 
qmBSCR => best peptide score (standard score),1
|
|
54 
qmALCV => alignment coverage (fraction),1
|
|
55 
qmSTCV => score type coverage (fraction),1
|
|
56 
qmPACV => peptide's best proteinAnnotCoverage (standard score),1
|
|
57 
qmPICV => peptide's best proteinIdentifCoverage (standard score),1
|
|
58 
qmANS => annotation sources (count),1
|
|
59 
qmCSEV => charge states evidence (count),0.2
|
|
60 
qmBCSP=> best correlation with source or product peptide (correl),1
|
|
61 
qmBCCS => best correlation with other charge state (correl),1
|
|
62 
qmBCOS => best correlation with other sibling peptide (correl),1
|
|
63 "/>
|
|
64
|
|
65 <param name="statisticalMeasuresConfig" type="text" area="true" size="6x70" label="Statistical measures configuration"
|
|
66 help="Here you may specify the statistical measures that are found in the ms/ms results (e.g. p or e-values).
|
|
67 The format is: SM alias => SM name,type,mode[min/max]. "
|
|
68 value="smXTD => MS:1001330,XSLASH!Tandem:expect,min
|
|
69 
pvCSVEX => p_value,CSV_EXPORT,min
|
|
70 
smAUTO_LIKELIHOOD => AUTOMOD_LOGLIKELIHOOD,PLGS/Auto-mod,max
|
|
71 
smLIKELIHOOD => LOGLIKELIHOOD,PLGS/Databank-search,max
|
|
72 "/>
|
|
73
|
|
74 <param name="filterOutUnannotatedAlignments" type="boolean" checked="true"
|
|
75 label="Filter out unannotated alignments"
|
|
76 help="This helps decrease the output file size (features with no annotation are then not reported anymore)"/>
|
|
77
|
|
78 <param name="filterOutFPAnnotations" type="boolean" checked="true"
|
|
79 label="Filter out False Positive (FP) annotations" />
|
|
80
|
|
81 <param name="fpCriteriaExpression" type="text" size="120" label="False Positive (FP) criteria for annotations"
|
|
82 help="Criteria (in standard score measures) for classifying an annotation as False Positive (FP).
|
|
83 You can build logical rules using the QM aliases above, the keywords 'and', 'or' and parenthesis.
|
|
84 Comparisons can be made with '==,<,><=,>='"
|
|
85 value="qmDRT <0 or qmDMA <-0.5 or (qmDMP <-0.5 and qmBSCR<-0.5) or (!isNaN(smXTD) and smXTD >0.01)"/>
|
|
86
|
|
87
|
|
88 <param name="filterOutFPIds" type="boolean" checked="true"
|
|
89 label="Filter out False Positive (FP) peptide identifications" />
|
|
90
|
|
91 <param name="fpCriteriaExpressionForIds" type="text" size="120"
|
|
92 label="False Positive (FP) criteria for identifications"
|
|
93 help="Criteria (in standard score measures) for classifying a peptide identification as False Positive (FP).
|
|
94 Here you can use a subset of the quality measures (qmDMP, qmBSCR, qmSTCV, qmPICV, qmCSEV) and all statistical measures."
|
|
95 value="(qmDMP <-0.5 and qmBSCR<-0.5) or (!isNaN(smXTD) and smXTD >0.01)"/>
|
|
96
|
|
97
|
|
98 <param name="addRawRankingInfo" type="boolean" checked="false"
|
|
99 label="Include the raw scores/values of the ranking attributes in the CSV output"
|
|
100 help="This will result in one extra column per ranking attribute, each column holding the original data for this attribute (before normalization)."/>
|
|
101
|
|
102 <param name="addScaledIntensityInfo" type="boolean" checked="false"
|
|
103 label="Include computed scaled intensity values in the CSV output"
|
|
104 help="The autoscaled and 'z-score'scaled (aka 'standard-score'scaled) intensity values are then added to the full CSV output file"/>
|
|
105
|
|
106 <param name="addRawIntensityInfo" type="boolean" checked="false"
|
|
107 label="Include the raw intensity values in the CSV output"
|
|
108 help="The original intensity values (as found in the input file) are then added to the full CSV output file"/>
|
|
109
|
|
110
|
|
111 </inputs>
|
|
112 <configfiles>
|
|
113 <configfile name="rankingMetadataFile">${rankingWeightConfig}</configfile>
|
|
114 <configfile name="statisticalMeasuresConfigFile">${statisticalMeasuresConfig}</configfile>
|
|
115 <configfile name="annotationSourceConfigFile">## start comment
|
|
116 ## iterate over the selected files and store their names in the config file
|
|
117 #for $i, $s in enumerate( $annotationSourceFiles )
|
10
|
118 ${s.identificationsFile}
|
0
|
119 ## also print out the datatype in the next line, based on previously configured datatype
|
|
120 #if isinstance( $s.identificationsFile.datatype, $__app__.datatypes_registry.get_datatype_by_extension('apml').__class__):
|
|
121 apml
|
|
122 #else:
|
|
123 mzid
|
|
124 #end if
|
|
125 #end for
|
|
126 ## end comment</configfile>
|
|
127 </configfiles>
|
|
128 <outputs>
|
|
129 <data name="outputApml" format="apml" label="${apmlFile.metadata.base_name} - ${tool.name} on ${on_string}: quantifications (filtered APML)" metadata_source="apmlFile">
|
|
130 <!-- If the expression is false, the file is not created -->
|
|
131 <filter>( apmlFile != None )</filter>
|
|
132 </data>
|
|
133 <data name="outNewIdsApml" format="apml" label="${tool.name} on ${on_string}: identifications (filtered APML)" >
|
|
134 <filter>( filterOutFPIds == True )</filter>
|
|
135 </data>
|
|
136 <data name="outputCSV" format="csv" label="${apmlFile.metadata.base_name} - ${tool.name} on ${on_string}: Full CSV" metadata_source="apmlFile">
|
|
137 <filter>( apmlFile != None )</filter>
|
|
138 </data>
|
|
139 <data name="outRankingTable" format="csv" label="${apmlFile.metadata.base_name} - ${tool.name} on ${on_string}: Ranking table (CSV)" metadata_source="apmlFile">
|
|
140 <filter>( apmlFile != None )</filter>
|
|
141 </data>
|
|
142 <data name="outProteinCoverageCSV" format="csv" label="${tool.name} on ${on_string}: Protein coverage details (CSV)">
|
|
143 <!-- If the expression is false, the file is not created -->
|
|
144 <filter>( len(list(enumerate(annotationSourceFiles))) > 0 )</filter>
|
|
145 </data>
|
|
146 <data name="htmlReportFile" format="html" label="${tool.name} on ${on_string} - HTML report"/>
|
|
147 </outputs>
|
|
148 <tests>
|
|
149 </tests>
|
|
150 <help>
|
|
151
|
|
152 .. class:: infomark
|
|
153
|
|
154 This tool takes in peptide quantification results (e.g. either by SEDMAT for label-free data or by Quantiline for labeled data)
|
|
155 and calculates a number of quality measures that can help in assessing the correctness of the quantification assignment and of the MS/MS peptide
|
|
156 identification itself. The user can use any combination of quality measures (qm's) and statistical measures (sm's) to filter out
|
|
157 low scoring entries.
|
|
158
|
|
159 .. class:: infomark
|
|
160
|
|
161 In the label-free data processed by SEDMAT it is possible that a feature quantification gets assigned to different peptides. This means
|
|
162 we have an ambiguous assignment. In such a case
|
|
163 this tool also does a ranking of the different assignments according to their quality measures so that the best scoring assignment
|
|
164 gets ranked as first.
|
|
165
|
|
166 -----
|
|
167
|
|
168 **List of abbreviations**
|
|
169
|
|
170 QM: Quality Measure
|
|
171
|
|
172 SM: Statistical Measure (e.g. p-value, e-value from MS/MS identification)
|
|
173
|
|
174 PSM: "Peptide to Spectrum Match" (aka peptide identification)
|
|
175
|
|
176 FP: False Positive
|
|
177
|
|
178 -----
|
|
179
|
|
180 **Filtering options details**
|
|
181
|
|
182 The FP criteria will be applied to an annotation even if the corresponding quality measures involved
|
|
183 in the expression can NOT ALL be determined. QMs that cannot be determined, get the value 0 (zero) which is
|
|
184 equal to giving it the average value.
|
|
185
|
|
186 The output report shows some plots that visualize the filtering done. This can help in fine-tuning the right filtering
|
|
187 criteria.
|
|
188
|
|
189 -----
|
|
190
|
|
191 **Output details**
|
|
192
|
|
193 *APML output*
|
|
194
|
|
195 This tools returns the given APML alignment file further annotated at the alignment level with the best ranking
|
|
196 peptides of each respective alignment. This APML can be used in subsequent Galaxy tools like the proteomics tools
|
|
197 from NBIC.
|
|
198
|
|
199 The APML output can also be used for the Protein Inference step (see Quantifere tool).
|
|
200
|
|
201 *CSV output*
|
|
202
|
|
203 It also returns a CSV format output with the full quality measures and scoring and ranking details. The user could use
|
|
204 this to manually determine new weights for some of the quality measures by techniques such as
|
|
205 linear regression. In other words, this CSV can then be used to fine-tune the weights in a next run.
|
|
206
|
|
207 Many of the quality measures (QMs) are normalized to their Standard Score (aka z-score).
|
|
208 `See Standard Score for more details...`__
|
|
209
|
|
210 Next to giving insight into how the ranking was established, a more complete version of this CSV file is also
|
|
211 generated for tools that cannot or won't process the APML output format.
|
|
212
|
|
213 Below an brief overview of the CSV and an illustration of the ranking done in case of ambiguous peptides to feature assignments
|
|
214 (explained above, can happen in case of label-free data processing by SEDMAT).
|
|
215
|
|
216
|
|
217 .. image:: $PATH_TO_IMAGES/msfilt_csv_out.png
|
|
218
|
|
219
|
|
220
|
|
221 .. __: javascript:window.open('http://en.wikipedia.org/wiki/Standard_score','popUpWindow','height=700,width=800,left=10,top=10,resizable=yes,scrollbars=yes,toolbar=yes,menubar=no,location=no,directories=no,status=yes')
|
|
222
|
|
223
|
|
224
|
|
225
|
|
226 </help>
|
|
227 </tool>
|