changeset 13:ca5a8db023e1 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 98223db312c30b0e121a1422a9534a3db3fbf0c0
author recetox
date Thu, 14 Dec 2023 13:45:58 +0000
parents a4661ff81aa4
children da15e8ea3b28
files matchms_filtering.xml matchms_filtering_wrapper.py test-data/filtering/derive_precursor_mz.msp test-data/filtering/derive_precursor_mz_out.msp
diffstat 4 files changed, 133 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/matchms_filtering.xml	Mon Dec 04 19:12:43 2023 +0000
+++ b/matchms_filtering.xml	Thu Dec 14 13:45:58 2023 +0000
@@ -1,4 +1,4 @@
-<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy0" profile="21.09">
+<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy1" profile="21.09">
     <description>filter and normalize mass spectrometry data</description>
 
     <macros>
@@ -55,6 +55,10 @@
             #if $require_inchi_is_true == "TRUE"
                 -require_inchi \
             #end if
+            #if $derive_precursor_mz_from_parent_mass.is_true == "TRUE"
+                -derive_precursor_mz_from_parent_mass \
+                --estimate_from_adduct "${derive_precursor_mz_from_parent_mass.estimate_from_adduct}" \
+            #end if
             #if $reduce_to_top_n_peaks.is_true == "TRUE"
                 -reduce_to_top_n_peaks \
                 --n_max "$reduce_to_top_n_peaks.n_max" \
@@ -104,6 +108,18 @@
         <param name="require_inchi_is_true" label="Require INCHI" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
                 help="Remove spectra that does not contain INCHI." />
 
+        <conditional name="derive_precursor_mz_from_parent_mass">
+            <param name="is_true" label="Derive precursor_mz from parent_mass" type="select"
+                   help="Derives the precursor_mz from the parent mass and adduct or charge.">
+                <option value="FALSE" selected="true">FALSE</option>
+                <option value="TRUE">TRUE</option>
+            </param>
+            <when value="TRUE">
+                <param label="Estimate from adduct" name="estimate_from_adduct" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" />
+            </when>
+            <when value="FALSE"></when>
+        </conditional>
+
         <conditional name="reduce_to_top_n_peaks">
             <param name="is_true" label="Reduce to top n peaks" type="select"
                    help="Lowest intensity peaks will be removed when it has more peaks than desired.">
@@ -174,6 +190,14 @@
             </section>
             <output name="output" file="filtering/reduce_to_top_n_peaks.msp" ftype="msp"/>
         </test>
+        <test>
+            <param name="spectra" value="filtering/derive_precursor_mz.msp" ftype="msp"/>
+            <section name="derive_precursor_mz_from_parent_mass">
+                <param name="is_true" value="TRUE"/>
+                <param name="estimate_from_adduct" value="TRUE"/>
+            </section>
+            <output name="output" file="filtering/derive_precursor_mz_out.msp" ftype="msp"/>
+        </test>
     </tests>
 
     <help>
--- a/matchms_filtering_wrapper.py	Mon Dec 04 19:12:43 2023 +0000
+++ b/matchms_filtering_wrapper.py	Thu Dec 14 13:45:58 2023 +0000
@@ -6,6 +6,7 @@
     add_retention_index, add_retention_time, clean_compound_name
 from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \
     select_by_relative_intensity
+from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass
 from matchms.importing import load_from_mgf, load_from_msp
 
 
@@ -39,6 +40,9 @@
                         help="Remove spectra that does not contain SMILES.")
     parser.add_argument("-require_inchi", action='store_true',
                         help="Remove spectra that does not contain INCHI.")
+    parser.add_argument("-derive_precursor_mz_from_parent_mass", action='store_true',
+                        help="Derives the precursor_mz from the parent mass and adduct or charge.")
+    parser.add_argument("--estimate_from_adduct", type=str, help="estimate from adduct.")
     parser.add_argument("-reduce_to_top_n_peaks", action='store_true',
                         help="reduce to top n peaks filter.")
     parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.")
@@ -51,6 +55,7 @@
             or args.mz_range
             or args.require_smiles
             or args.require_inchi
+            or args.derive_precursor_mz_from_parent_mass
             or args.reduce_to_top_n_peaks):
         raise ValueError('No filter selected.')
 
@@ -84,6 +89,11 @@
         if args.reduce_to_top_n_peaks:
             spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max)
 
+        if args.derive_precursor_mz_from_parent_mass:
+            spectrum.set("parent_mass", float(spectrum.get('parent_mass')))
+            precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct)
+            spectrum.set("precursor_mz", precursor_mz)
+
         if args.require_smiles and spectrum is not None:
             spectrum = require_key(spectrum, "smiles")
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/derive_precursor_mz.msp	Thu Dec 14 13:45:58 2023 +0000
@@ -0,0 +1,48 @@
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C20H12
+INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N
+SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Perylene
+RETENTION_TIME: None
+RETENTION_INDEX: 2886.9
+ADDUCT: [M]+
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 251.08595400000002
+NUM PEAKS: 3
+250.07765   0.3282529462971431
+252.09323   1.0
+253.09656   0.20573802940517583
+
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C14H10
+INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N
+SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Phenanthrene
+RETENTION_TIME: None
+RETENTION_INDEX: 1832.9
+ADDUCT: [M]+
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 177.070224
+NUM PEAKS: 5
+152.0619    0.1657993569424221
+176.062     0.24558560966311757
+177.06982   0.12764433529926775
+178.0775    1.0
+179.08078   0.16394988149600653
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/derive_precursor_mz_out.msp	Thu Dec 14 13:45:58 2023 +0000
@@ -0,0 +1,50 @@
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C20H12
+INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N
+SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Perylene
+RETENTION_TIME: None
+RETENTION_INDEX: 2886.9
+ADDUCT: [M]+
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 251.08595400000002
+PRECURSOR_MZ: 251.08540542009078
+NUM PEAKS: 3
+250.07765   0.3282529462971431
+252.09323   1.0
+253.09656   0.20573802940517583
+
+SCANNUMBER: -1
+IONMODE: positive
+SPECTRUMTYPE: Centroid
+FORMULA: C14H10
+INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N
+SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2
+AUTHORS: Price et al., RECETOX, Masaryk University (CZ)
+INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS
+IONIZATION: EI+
+LICENSE: CC BY-NC
+COMPOUND_NAME: Phenanthrene
+RETENTION_TIME: None
+RETENTION_INDEX: 1832.9
+ADDUCT: [M]+
+COLLISION_ENERGY: 70eV
+INSTRUMENT_TYPE: GC-EI-Orbitrap
+CHARGE: 1
+PARENT_MASS: 177.070224
+PRECURSOR_MZ: 177.06967542009076
+NUM PEAKS: 5
+152.0619    0.1657993569424221
+176.062     0.24558560966311757
+177.06982   0.12764433529926775
+178.0775    1.0
+179.08078   0.16394988149600653
+