Mercurial > repos > computational-metabolomics > mspurity_spectralmatching
comparison spectralMatching.xml @ 0:5ff9d40c7a42 draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit cb903cd93f9378cfb5eeb68512a54178dcea7bbc-dirty"
author | computational-metabolomics |
---|---|
date | Wed, 27 Nov 2019 12:31:31 -0500 |
parents | |
children | aee10d29e82c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5ff9d40c7a42 |
---|---|
1 <tool id="mspurity_spectralmatching" name="msPurity.spectralMatching" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@"> | |
2 <description> | |
3 Perform spectral matching to MS/MS spectral libraries | |
4 </description> | |
5 <macros> | |
6 <import>macros.xml</import> | |
7 </macros> | |
8 <expand macro="requirements"/> | |
9 <command detect_errors="exit_code"><![CDATA[ | |
10 Rscript '$__tool_directory__/spectralMatching.R' | |
11 --outDir=. | |
12 --cores=\${GALAXY_SLOTS:-4} | |
13 | |
14 #if $Query.q_dbPth_con.q_dbPth_select == 'msPurityData' | |
15 --q_defaultDb | |
16 #else if $Query.q_dbPth_con.q_dbPth_select == 'sqlite' | |
17 --q_dbPth=$Query.q_dbPth_con.q_dbPth | |
18 #end if | |
19 | |
20 #if $Library.l_dbPth_con.l_dbPth_select == 'msPurityData' | |
21 --l_defaultDb | |
22 #else if $Library.l_dbPth_con.l_dbPth_select == 'userdb_sqlite' | |
23 --l_dbPth=$Library.l_dbPth_con.l_dbPth | |
24 #end if | |
25 | |
26 --l_dbType=$Library.l_dbPth_con.l_dbPth_select | |
27 --q_dbType=$Query.q_dbPth_con.q_dbPth_select | |
28 | |
29 | |
30 --q_ppmPrec=$Query.q_filters.q_ppmPrec | |
31 --l_ppmPrec=$Library.l_filters.l_ppmPrec | |
32 | |
33 --q_ppmProd=$Query.q_filters.q_ppmProd | |
34 --l_ppmProd=$Library.l_filters.l_ppmProd | |
35 | |
36 | |
37 #if $Query.q_filters.q_raThres_cond.q_raThres_bool | |
38 --q_raThres=$Query.q_filters.q_raThres_cond.q_raThres | |
39 #end if | |
40 | |
41 #if $Library.l_filters.l_raThres_cond.l_raThres_bool | |
42 --l_raThres=$Library.l_filters.l_raThres_cond.l_raThres | |
43 #end if | |
44 | |
45 #if $Query.q_filters.q_polarity_cond.q_polarity_bool | |
46 --q_polarity=$Query.q_filters.q_polarity_cond.q_polarity | |
47 #end if | |
48 | |
49 #if $Library.l_filters.l_polarity_cond.l_polarity_bool | |
50 --l_polarity=$Library.l_filters.l_polarity_cond.l_polarity | |
51 #end if | |
52 | |
53 #if $Query.q_filters.q_purity_cond.q_purity_bool | |
54 --q_purity=$Query.q_filters.q_purity_cond.q_purity | |
55 #end if | |
56 | |
57 #if $Library.l_filters.l_purity_cond.l_purity_bool | |
58 --l_purity=$Library.l_filters.l_purity_cond.l_purity | |
59 #end if | |
60 | |
61 #if $Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups_bool | |
62 --q_xcmsGroups=$Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups | |
63 #end if | |
64 | |
65 #if $Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups_bool | |
66 --l_xcmsGroups=$Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups | |
67 #end if | |
68 | |
69 #if $Query.q_filters.q_pids_cond.q_pids_bool | |
70 --q_pids=$Query.q_filters.q_pids_cond.q_pids | |
71 #end if | |
72 | |
73 #if $Library.l_filters.l_pids_cond.l_pids_bool | |
74 --l_pids=$Library.l_filters.l_pids_cond.l_pids | |
75 #end if | |
76 | |
77 #if $Query.q_filters.q_rtrange_cond.q_rtrange_bool | |
78 --q_rtrangeMin=$Query.q_filters.q_rtrange_cond.q_rtrangeMin | |
79 --q_rtrangeMax=$Query.q_filters.q_rtrange_cond.q_rtrangeMax | |
80 #end if | |
81 | |
82 #if $Library.l_filters.l_rtrange_cond.l_rtrange_bool | |
83 --l_rtrangeMin=$Library.l_filters.l_rtrange_cond.l_rtrangeMin | |
84 --l_rtrangeMax=$Library.l_filters.l_rtrange_cond.l_rtrangeMax | |
85 #end if | |
86 | |
87 #if $Query.q_filters.q_accessions_cond.q_accessions_bool | |
88 --q_accessions=$Query.q_filters.q_accessions_cond.q_accessions | |
89 #end if | |
90 | |
91 #if $Library.l_filters.l_accessions_cond.l_accessions_bool | |
92 --l_accessions=$Library.l_filters.l_accessions_cond.l_accessions | |
93 #end if | |
94 | |
95 | |
96 #if $Query.q_filters.q_sources_cond.q_sources_bool | |
97 --q_sources=$Query.q_filters.q_sources_cond.q_sources | |
98 --q_sourcesUser='$Query.q_filters.q_sources_cond.q_sourcesUser' | |
99 #end if | |
100 | |
101 #if $Library.l_filters.l_sources_cond.l_sources_bool | |
102 --l_sources=$Library.l_filters.l_sources_cond.l_sources | |
103 --l_sourcesUser='$Library.l_filters.l_sources_cond.l_sourcesUser' | |
104 #end if | |
105 | |
106 #if $Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes_bool | |
107 --q_instrumentTypes='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes' | |
108 --q_instrumentTypesUser='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypesUser' | |
109 #end if | |
110 | |
111 #if $Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes_bool | |
112 --l_instrumentTypes='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes' | |
113 --l_instrumentTypesUser='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypesUser' | |
114 #end if | |
115 | |
116 #if $Query.q_filters.q_instruments_cond.q_instruments_bool | |
117 --q_instruments=$Query.q_filters.q_instruments_cond.q_instruments | |
118 #end if | |
119 | |
120 #if $Library.l_filters.l_instruments_cond.l_instruments_bool | |
121 --l_instruments='$Library.l_filters.l_instruments_cond.l_instruments' | |
122 #end if | |
123 | |
124 #if $Query.q_filters.q_spectraTypes_cond.q_spectraTypes_bool | |
125 --q_spectraTypes=$Query.q_filters.q_spectraTypes_cond.q_spectraTypes | |
126 #end if | |
127 | |
128 #if $Library.l_filters.l_spectraTypes_cond.l_spectraTypes_bool | |
129 --l_spectraTypes=$Library.l_filters.l_spectraTypes_cond.l_spectraTypes | |
130 #end if | |
131 | |
132 #if $Query.q_filters.q_spectraFilter | |
133 --q_spectraFilter | |
134 #end if | |
135 | |
136 #if $Library.l_filters.l_spectraFilter | |
137 --l_spectraFilter | |
138 #end if | |
139 | |
140 #if $General.rttol_cond.rttol_bool | |
141 --rttol=$General.rttol_cond.rttol | |
142 #end if | |
143 | |
144 --raW=$General.raW | |
145 --mzW=$General.mzW | |
146 | |
147 #if $General.updateDb_cond.updateDb | |
148 --updateDb | |
149 #if $General.updateDb_cond.copyDb | |
150 --copyDb | |
151 #end if | |
152 #end if | |
153 | |
154 #if $General.usePrecursors | |
155 --usePrecursors | |
156 #end if | |
157 | |
158 ]]></command> | |
159 <inputs> | |
160 <section name="Query" title="Query spectra input and filters" expanded="True"> | |
161 <expand macro="sm_input" ql="Query" ql_shrt = "q" user="True" mspuritydatalib="False" msp="False" | |
162 help="Query SQLite database - in the standard XCMS msPurity workflow - the output | |
163 of msPurity.createDatabase should be used here. However any SQLite database | |
164 following the schema of as https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/> | |
165 <expand macro="filters" ql="Query" ql_shrt="q"/> | |
166 </section> | |
167 <section name="Library" title="Library spectra input and filters" expanded="True"> | |
168 <expand macro="sm_input" ql="Library" ql_shrt = "l" user="False" mspuritydatalib="True" msp="False" | |
169 help="Library SQLite database - in the standard XCMS msPurity workflow - a default | |
170 database of MassBank, HMDB, LipidBlast and GNPS is used. However any SQLite | |
171 database following the schema of https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/> | |
172 <expand macro="filters" ql="Library" ql_shrt="l"/> | |
173 </section> | |
174 <section name="General" title="General arguments" expanded="False"> | |
175 <conditional name="rttol_cond"> | |
176 <param name="rttol_bool" type="boolean" label="Filter on retention time match?" | |
177 help="" /> | |
178 <when value="true"> | |
179 <param name="rttol" type="float" value="30" min="0" | |
180 label="Retention time tolerance (seconds)" | |
181 help="Retention time tolerance in seconds to match precursors"/> | |
182 </when> | |
183 <when value="false"/> | |
184 </conditional> | |
185 <param name="usePrecursors" type="boolean" checked="true" label="Filter on matching precursors?" | |
186 help="If True, spectra will be filtered by similarity of precursors based on | |
187 the library and query ppm defined tolerance" /> | |
188 <param name="raW" label="Weighting for relative abundance" type="float" value="0.5" | |
189 help="Relative abundance weight for spectra (default to 0.5 as determined by | |
190 massbank for ESI data)"/> | |
191 <param name="mzW" label="Weighting for mz" type="float" min="0" value="2" | |
192 help="mz weight for spectra (default to 2 as determined by massbank for ESI data)"/> | |
193 <conditional name="updateDb_cond"> | |
194 <param name="updateDb" type="boolean" checked="true" | |
195 label="Update database with results?" help="" /> | |
196 <when value="true"> | |
197 <param name="copyDb" type="boolean" checked="true" | |
198 label="Make a copy of the database?" | |
199 help="A copy will be made of the input SQLite target database and the | |
200 results will be added to this copy. When False, the input SQLite | |
201 database will be updated with the matching results. Use False if | |
202 you want to reduce storage space being used."/> | |
203 </when> | |
204 <when value="false"/> | |
205 </conditional> | |
206 </section> | |
207 </inputs> | |
208 | |
209 <outputs> | |
210 <data name="sqlite_results" format="sqlite" label="${tool.name} on ${on_string}: SQLite results" | |
211 from_work_dir="db_with_spectral_matching.sqlite" > | |
212 <filter>create_new_database is True</filter> | |
213 </data> | |
214 <data name="matches" format="tsv" label="${tool.name} on ${on_string}: matches" | |
215 from_work_dir="matched_results.tsv" > | |
216 <filter>spectra_type_q == "scans"</filter> | |
217 </data> | |
218 <data name="xcms_matches" format="tsv" label="${tool.name} on ${on_string}: XCMS matches" | |
219 from_work_dir="xcms_matched_results.tsv" /> | |
220 </outputs> | |
221 <tests> | |
222 <test> | |
223 <param name="q_dbPth" value="createDatabase_output.sqlite" /> | |
224 <param name="l_dbPth_select" value="userdb" /> | |
225 <param name="l_dbPth" value="PR100037.sqlite" /> | |
226 <param name="q_xcmsGroups_bool" value="true" /> | |
227 <param name="l_accessions_bool" value="true" /> | |
228 <param name="q_xcmsGroups" value="14" /> | |
229 <param name="l_accessions" value="PR100037" /> | |
230 <output name="xcms_matches" file="spectralMatching_matched_results.tsv" /> | |
231 <output name="matches" file="spectralMatching_xcms_matched_results.tsv" /> | |
232 <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching.sqlite" ftype="sqlite" compare="sim_size"/> | |
233 </test> | |
234 <test> | |
235 <param name="l_instrumentTypes_bool" value="true" /> | |
236 <param name="q_dbPth" value="createDatabase_output.sqlite" /> | |
237 <param name="l_dbPth_select" value="userdb" /> | |
238 <param name="l_dbPth" value="PR100037.sqlite" /> | |
239 <param name="q_xcmsGroups_bool" value="true" /> | |
240 <param name="l_accessions_bool" value="true" /> | |
241 <param name="q_xcmsGroups" value="14" /> | |
242 <param name="l_accessions" value="PR100037" /> | |
243 <output name="xcms_matches" file="spectralMatching_matched_results_instrumentTypes.tsv" /> | |
244 <output name="matches" file="spectralMatching_xcms_matched_results_instrumentTypes.tsv" /> | |
245 <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching_instrumentTypes.sqlite" ftype="sqlite" compare="sim_size"/> | |
246 </test> | |
247 </tests> | |
248 | |
249 <help><![CDATA[ | |
250 | |
251 ============================================================= | |
252 Spectral matching | |
253 ============================================================= | |
254 ----------- | |
255 General | |
256 ----------- | |
257 | |
258 | |
259 Perform spectral matching to spectral libraries for an LC-MS/MS dataset. | |
260 | |
261 The spectral matching is performed from a **Query** SQLite spectral-database against a **Library** SQLite spectral-database. | |
262 | |
263 The SQLite schema of the spectral database here: spectral_database_schema_ | |
264 | |
265 | |
266 The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity | |
267 function createDatabase as part of a msPurity-XCMS data processing workflow. | |
268 | |
269 The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources. | |
270 The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS. | |
271 A larger_database_ can be download and used from the msp2db github repository. | |
272 | |
273 To create a user generated library SQLite database the following tool can be used to generate a SQLite database | |
274 from a collection of MSP files: msp2db_. | |
275 | |
276 It should be noted though, that as long as the schema of the spectral-database is as described here, then any database can be used | |
277 for either the library or query - even allowing for the same database to be used. | |
278 | |
279 The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching, | |
280 and summarising the results. | |
281 | |
282 Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing | |
283 the library source, instrument, retention time, precursor PPM tolerance etc). | |
284 | |
285 The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar | |
286 to modified pMatch algorithm described in Zhou et al 2015. | |
287 | |
288 The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both | |
289 the query and library spectra (wq and wl). See below: | |
290 | |
291 .. math:: | |
292 | |
293 w=intensity^x \cdot mz^y | |
294 | |
295 | |
296 Where x and y represent weight factors and can be adjusted with the parameters raW and mzW. | |
297 Defaults to x=0.5 and y=2 as per MassBank for ESI based mass spectrometry data. | |
298 | |
299 The aligned weighted vectors are then matched using dot product cosine, reverse dot product cosine and the composite dot product. | |
300 See below for dot product cosine equation. | |
301 | |
302 .. math:: | |
303 | |
304 dpc = \frac{ w_q \cdot w_l } { \sqrt{Σ{w_{q}{}^2} } \cdot \sqrt{Σ{w_{l}{}^2}}} | |
305 | |
306 | |
307 Full details of the matching approaches are described in the msPurity_spectral_matching_vignette_ | |
308 | |
309 -------------------------------------------- | |
310 Example LC-MS/MS processing workflow | |
311 -------------------------------------------- | |
312 | |
313 * Purity assessments | |
314 + (mzML files) -> purityA -> (pa) | |
315 * XCMS processing | |
316 + (mzML files) -> xcms.xcmsSet -> xcms.merge -> xcms.group -> xcms.retcor -> xcms.group -> (xset) | |
317 * Fragmentation processing | |
318 + (xset, pa) -> frag4feature -> filterFragSpectra -> averageAllFragSpectra -> createDatabase -> **spectralMatching** -> (sqlite spectral database) | |
319 | |
320 ----------- | |
321 Output | |
322 ----------- | |
323 | |
324 **Database** | |
325 | |
326 The updated query database (this will have been updated with the annotation results if updateDb argument used) | |
327 | |
328 | |
329 **xcmsMatchedResults** | |
330 | |
331 If the qeury spectra had XCMS based chromotographic peaks tables (e.g c_peak_groups, c_peaks) in the sqlite database - it will | |
332 be possible to summarise the matches for each XCMS grouped feature. The dataframe contains the following columns | |
333 | |
334 * lpid - id in database of library spectra | |
335 * qpid - id in database of query spectra | |
336 * dpc - dot product cosine of the match | |
337 * rdpc - reverse dot product cosine of the match | |
338 * cdpc - composite dot product cosine of the match | |
339 * mcount - number of matching peaks | |
340 * allcount - total number of peaks across both query and library spectra | |
341 * mpercent - percentage of matching peaks across both query and library spectra | |
342 * library_rt - retention time of library spectra | |
343 * query_rt - retention time of query spectra | |
344 * rtdiff - difference between library and query retention time | |
345 * library_precursor_mz - library precursor mz | |
346 * query_precursor_mz - query precursor mz | |
347 * library_precursor_ion_purity - library precursor ion purity | |
348 * query_precursor_ion_purity - query precursor ion purity | |
349 * library_accession - library accession value (unique string or number given to eith MoNA or Massbank data entires) | |
350 * library_precursor_type - library precursor type (i.e. adduct) | |
351 * library_entry_name - Name given to the library spectra | |
352 * inchikey - inchikey of the matched library spectra | |
353 * library_source_name - source of the spectra (e.g. massbank, gnps) | |
354 * library_compound_name - name of compound spectra was obtained from | |
355 | |
356 **matchedResults** | |
357 | |
358 All matched results from the query spectra to the library spectra. Contains the same as above | |
359 without the XCMS details. This table is useful to observe spectral matching results | |
360 for all MS/MS spectra irrespective of if they are linked to XCMS MS1 features. | |
361 | |
362 | |
363 .. _spectral_database_schema: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-datatabase-schema.html | |
364 .. _larger_database: https://github.com/computational-metabolomics/msp2db/releases | |
365 .. _msp2db: https://github.com/computational-metabolomics/msp2db/releases | |
366 .. _msPurity_spectral_matching_vignette: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-lcmsms-data-processing-and-spectral-matching-vignette.html | |
367 | |
368 ]]></help> | |
369 | |
370 <expand macro="citations"> </expand> | |
371 </tool> |