Mercurial > repos > computational-metabolomics > mspurity_spectralmatching
view spectralMatching.xml @ 9:c33b92eeb1fb draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 20a48a1862267264f98b7c514287f9a5cba1143f
author | computational-metabolomics |
---|---|
date | Thu, 13 Jun 2024 11:37:41 +0000 |
parents | fecfe8c80e25 |
children |
line wrap: on
line source
<tool id="mspurity_spectralmatching" name="msPurity.spectralMatching" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@"> <description> Perform spectral matching to MS/MS spectral libraries </description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ Rscript '$__tool_directory__/spectralMatching.R' --outDir=. --cores=\${GALAXY_SLOTS:-4} #if $Query.q_dbPth_con.q_dbPth_select == 'msPurityData' --q_defaultDb #else if $Query.q_dbPth_con.q_dbPth_select == 'sqlite' --q_dbPth=$Query.q_dbPth_con.q_dbPth #end if #if $Library.l_dbPth_con.l_dbPth_select == 'msPurityData' --l_defaultDb #else if $Library.l_dbPth_con.l_dbPth_select == 'sqlite' --l_dbPth=$Library.l_dbPth_con.l_dbPth #end if --l_dbType=$Library.l_dbPth_con.l_dbPth_select --q_dbType=$Query.q_dbPth_con.q_dbPth_select --q_ppmPrec=$Query.q_filters.q_ppmPrec --l_ppmPrec=$Library.l_filters.l_ppmPrec --q_ppmProd=$Query.q_filters.q_ppmProd --l_ppmProd=$Library.l_filters.l_ppmProd #if $Query.q_filters.q_raThres_cond.q_raThres_bool --q_raThres=$Query.q_filters.q_raThres_cond.q_raThres #end if #if $Library.l_filters.l_raThres_cond.l_raThres_bool --l_raThres=$Library.l_filters.l_raThres_cond.l_raThres #end if #if $Query.q_filters.q_polarity_cond.q_polarity_bool --q_polarity=$Query.q_filters.q_polarity_cond.q_polarity #end if #if $Library.l_filters.l_polarity_cond.l_polarity_bool --l_polarity=$Library.l_filters.l_polarity_cond.l_polarity #end if #if $Query.q_filters.q_purity_cond.q_purity_bool --q_purity=$Query.q_filters.q_purity_cond.q_purity #end if #if $Library.l_filters.l_purity_cond.l_purity_bool --l_purity=$Library.l_filters.l_purity_cond.l_purity #end if #if $Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups_bool --q_xcmsGroups=$Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups #end if #if $Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups_bool --l_xcmsGroups=$Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups #end if #if $Query.q_filters.q_pids_cond.q_pids_bool --q_pids=$Query.q_filters.q_pids_cond.q_pids #end if #if $Library.l_filters.l_pids_cond.l_pids_bool --l_pids=$Library.l_filters.l_pids_cond.l_pids #end if #if $Query.q_filters.q_rtrange_cond.q_rtrange_bool --q_rtrangeMin=$Query.q_filters.q_rtrange_cond.q_rtrangeMin --q_rtrangeMax=$Query.q_filters.q_rtrange_cond.q_rtrangeMax #end if #if $Library.l_filters.l_rtrange_cond.l_rtrange_bool --l_rtrangeMin=$Library.l_filters.l_rtrange_cond.l_rtrangeMin --l_rtrangeMax=$Library.l_filters.l_rtrange_cond.l_rtrangeMax #end if #if $Query.q_filters.q_accessions_cond.q_accessions_bool --q_accessions=$Query.q_filters.q_accessions_cond.q_accessions #end if #if $Library.l_filters.l_accessions_cond.l_accessions_bool --l_accessions=$Library.l_filters.l_accessions_cond.l_accessions #end if #if $Query.q_filters.q_sources_cond.q_sources_bool --q_sources=$Query.q_filters.q_sources_cond.q_sources --q_sourcesUser='$Query.q_filters.q_sources_cond.q_sourcesUser' #end if #if $Library.l_filters.l_sources_cond.l_sources_bool --l_sources=$Library.l_filters.l_sources_cond.l_sources --l_sourcesUser='$Library.l_filters.l_sources_cond.l_sourcesUser' #end if #if $Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes_bool --q_instrumentTypes='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes' --q_instrumentTypesUser='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypesUser' #end if #if $Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes_bool --l_instrumentTypes='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes' --l_instrumentTypesUser='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypesUser' #end if #if $Query.q_filters.q_instruments_cond.q_instruments_bool --q_instruments=$Query.q_filters.q_instruments_cond.q_instruments #end if #if $Library.l_filters.l_instruments_cond.l_instruments_bool --l_instruments='$Library.l_filters.l_instruments_cond.l_instruments' #end if #if $Query.q_filters.q_spectraTypes_cond.q_spectraTypes_bool --q_spectraTypes=$Query.q_filters.q_spectraTypes_cond.q_spectraTypes #end if #if $Library.l_filters.l_spectraTypes_cond.l_spectraTypes_bool --l_spectraTypes=$Library.l_filters.l_spectraTypes_cond.l_spectraTypes #end if #if $Query.q_filters.q_spectraFilter --q_spectraFilter #end if #if $Library.l_filters.l_spectraFilter --l_spectraFilter #end if #if $General.rttol_cond.rttol_bool --rttol=$General.rttol_cond.rttol #end if --raW=$General.raW --mzW=$General.mzW #if $General.updateDb_cond.updateDb --updateDb #if $General.updateDb_cond.copyDb --copyDb #end if #end if #if $General.usePrecursors --usePrecursors #end if ]]></command> <inputs> <section name="Query" title="Query spectra input and filters" expanded="True"> <expand macro="sm_input" ql="Query" ql_shrt = "q" user="True" mspuritydatalib="False" msp="False" help="Query SQLite database - in the standard XCMS msPurity workflow - the output of msPurity.createDatabase should be used here. However any SQLite database following the schema of as https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/> <expand macro="filters" ql="Query" ql_shrt="q"/> </section> <section name="Library" title="Library spectra input and filters" expanded="True"> <expand macro="sm_input" ql="Library" ql_shrt = "l" user="False" mspuritydatalib="True" msp="False" help="Library SQLite database - in the standard XCMS msPurity workflow - a default database of MassBank, HMDB, LipidBlast and GNPS is used. However any SQLite database following the schema of https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/> <expand macro="filters" ql="Library" ql_shrt="l"/> </section> <section name="General" title="General arguments" expanded="False"> <conditional name="rttol_cond"> <param name="rttol_bool" type="boolean" label="Filter on retention time match?" help="" /> <when value="true"> <param name="rttol" type="float" value="30" min="0" label="Retention time tolerance (seconds)" help="Retention time tolerance in seconds to match precursors"/> </when> <when value="false"/> </conditional> <param name="usePrecursors" type="boolean" checked="true" label="Filter on matching precursors?" help="If True, spectra will be filtered by similarity of precursors based on the library and query ppm defined tolerance" /> <param name="raW" label="Weighting for relative abundance" type="float" value="0.5" help="Relative abundance weight for spectra (default to 0.5 as determined by massbank for ESI data)"/> <param name="mzW" label="Weighting for mz" type="float" min="0" value="2" help="mz weight for spectra (default to 2 as determined by massbank for ESI data)"/> <conditional name="updateDb_cond"> <param name="updateDb" type="boolean" checked="true" label="Update database with results?" help="" /> <when value="true"> <param name="copyDb" type="boolean" checked="true" label="Make a copy of the database?" help="A copy will be made of the input SQLite target database and the results will be added to this copy. When False, the input SQLite database will be updated with the matching results. Use False if you want to reduce storage space being used."/> </when> <when value="false"/> </conditional> </section> </inputs> <outputs> <data name="sqlite_results" format="sqlite" label="${tool.name} on ${on_string}: SQLite results" from_work_dir="db_with_spectral_matching.sqlite" > <filter>create_new_database is True</filter> </data> <data name="matches" format="tsv" label="${tool.name} on ${on_string}: matches" from_work_dir="matched_results.tsv" > <filter>spectra_type_q == "scans"</filter> </data> <data name="xcms_matches" format="tsv" label="${tool.name} on ${on_string}: XCMS matches" from_work_dir="xcms_matched_results.tsv" /> </outputs> <tests> <test> <param name="q_dbPth" value="createDatabase_output.sqlite" /> <param name="q_spectraTypes_bool" value="true" /> <param name="q_spectraTypes" value="inter,av_all" /> <param name="l_dbPth_select" value="userdb" /> <param name="l_dbPth" value="PR100037.sqlite" /> <param name="q_xcmsGroups_bool" value="true" /> <param name="l_accessions_bool" value="true" /> <param name="q_xcmsGroups" value="14" /> <param name="l_accessions" value="PR100037" /> <output name="xcms_matches" file="spectralMatching_matched_results.tsv" /> <output name="matches" file="spectralMatching_xcms_matched_results.tsv" /> <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching.sqlite" ftype="sqlite" compare="sim_size"/> </test> <test> <param name="l_instrumentTypes_bool" value="true" /> <param name="q_dbPth" value="createDatabase_output.sqlite" /> <param name="q_spectraTypes_bool" value="true" /> <param name="q_spectraTypes" value="inter,av_all" /> <param name="l_dbPth_select" value="userdb" /> <param name="l_dbPth" value="PR100037.sqlite" /> <param name="q_xcmsGroups_bool" value="true" /> <param name="l_accessions_bool" value="true" /> <param name="q_xcmsGroups" value="14" /> <param name="l_accessions" value="PR100037" /> <output name="xcms_matches" file="spectralMatching_matched_results_instrumentTypes.tsv" /> <output name="matches" file="spectralMatching_xcms_matched_results_instrumentTypes.tsv" /> <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching_instrumentTypes.sqlite" ftype="sqlite" compare="sim_size"/> </test> </tests> <help><![CDATA[ ============================================================= Spectral matching ============================================================= ----------- General ----------- Perform spectral matching to spectral libraries for an LC-MS/MS dataset. The spectral matching is performed from a **Query** SQLite spectral-database against a **Library** SQLite spectral-database. The SQLite schema of the spectral database here: spectral_database_schema_ The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity function createDatabase as part of a msPurity-XCMS data processing workflow. The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources. The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS. A larger_database_ can be download and used from the msp2db github repository. To create a user generated library SQLite database the following tool can be used to generate a SQLite database from a collection of MSP files: msp2db_. It should be noted though, that as long as the schema of the spectral-database is as described here, then any database can be used for either the library or query - even allowing for the same database to be used. The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching, and summarising the results. Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing the library source, instrument, retention time, precursor PPM tolerance etc). The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar to modified pMatch algorithm described in Zhou et al 2015. The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both the query and library spectra (wq and wl). See below: .. math:: w=intensity^x \cdot mz^y Where x and y represent weight factors and can be adjusted with the parameters raW and mzW. Defaults to x=0.5 and y=2 as per MassBank for ESI based mass spectrometry data. The aligned weighted vectors are then matched using dot product cosine, reverse dot product cosine and the composite dot product. See below for dot product cosine equation. .. math:: dpc = \frac{ w_q \cdot w_l } { \sqrt{Σ{w_{q}{}^2} } \cdot \sqrt{Σ{w_{l}{}^2}}} Full details of the matching approaches are described in the msPurity_spectral_matching_vignette_ -------------------------------------------- Example LC-MS/MS processing workflow -------------------------------------------- * Purity assessments + (mzML files) -> purityA -> (pa) * XCMS processing + (mzML files) -> xcms.xcmsSet -> xcms.merge -> xcms.group -> xcms.retcor -> xcms.group -> (xset) * Fragmentation processing + (xset, pa) -> frag4feature -> filterFragSpectra -> averageAllFragSpectra -> createDatabase -> **spectralMatching** -> (sqlite spectral database) ----------- Output ----------- **Database** The updated query database (this will have been updated with the annotation results if updateDb argument used) **xcmsMatchedResults** If the qeury spectra had XCMS based chromotographic peaks tables (e.g c_peak_groups, c_peaks) in the sqlite database - it will be possible to summarise the matches for each XCMS grouped feature. The dataframe contains the following columns * lpid - id in database of library spectra * qpid - id in database of query spectra * dpc - dot product cosine of the match * rdpc - reverse dot product cosine of the match * cdpc - composite dot product cosine of the match * mcount - number of matching peaks * allcount - total number of peaks across both query and library spectra * mpercent - percentage of matching peaks across both query and library spectra * library_rt - retention time of library spectra * query_rt - retention time of query spectra * rtdiff - difference between library and query retention time * library_precursor_mz - library precursor mz * query_precursor_mz - query precursor mz * library_precursor_ion_purity - library precursor ion purity * query_precursor_ion_purity - query precursor ion purity * library_accession - library accession value (unique string or number given to eith MoNA or Massbank data entires) * library_precursor_type - library precursor type (i.e. adduct) * library_entry_name - Name given to the library spectra * inchikey - inchikey of the matched library spectra * library_source_name - source of the spectra (e.g. massbank, gnps) * library_compound_name - name of compound spectra was obtained from **matchedResults** All matched results from the query spectra to the library spectra. Contains the same as above without the XCMS details. This table is useful to observe spectral matching results for all MS/MS spectra irrespective of if they are linked to XCMS MS1 features. .. _spectral_database_schema: https://www.bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html .. _larger_database: https://github.com/computational-metabolomics/msp2db/releases .. _msp2db: https://github.com/computational-metabolomics/msp2db/releases .. _msPurity_spectral_matching_vignette: https://www.bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-lcmsms-data-processing-and-spectral-matching-vignette.html ]]></help> <expand macro="citations"> </expand> </tool>