# HG changeset patch # User recetox # Date 1702561558 0 # Node ID ca5a8db023e1b05912b888788c7214748b45dd64 # Parent a4661ff81aa44d40f6b476fd80a6ed6db8fd71bb planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 98223db312c30b0e121a1422a9534a3db3fbf0c0 diff -r a4661ff81aa4 -r ca5a8db023e1 matchms_filtering.xml --- a/matchms_filtering.xml Mon Dec 04 19:12:43 2023 +0000 +++ b/matchms_filtering.xml Thu Dec 14 13:45:58 2023 +0000 @@ -1,4 +1,4 @@ - + filter and normalize mass spectrometry data @@ -55,6 +55,10 @@ #if $require_inchi_is_true == "TRUE" -require_inchi \ #end if + #if $derive_precursor_mz_from_parent_mass.is_true == "TRUE" + -derive_precursor_mz_from_parent_mass \ + --estimate_from_adduct "${derive_precursor_mz_from_parent_mass.estimate_from_adduct}" \ + #end if #if $reduce_to_top_n_peaks.is_true == "TRUE" -reduce_to_top_n_peaks \ --n_max "$reduce_to_top_n_peaks.n_max" \ @@ -104,6 +108,18 @@ + + + + + + + + + + + @@ -174,6 +190,14 @@ + + +
+ + +
+ +
diff -r a4661ff81aa4 -r ca5a8db023e1 matchms_filtering_wrapper.py --- a/matchms_filtering_wrapper.py Mon Dec 04 19:12:43 2023 +0000 +++ b/matchms_filtering_wrapper.py Thu Dec 14 13:45:58 2023 +0000 @@ -6,6 +6,7 @@ add_retention_index, add_retention_time, clean_compound_name from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \ select_by_relative_intensity +from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass from matchms.importing import load_from_mgf, load_from_msp @@ -39,6 +40,9 @@ help="Remove spectra that does not contain SMILES.") parser.add_argument("-require_inchi", action='store_true', help="Remove spectra that does not contain INCHI.") + parser.add_argument("-derive_precursor_mz_from_parent_mass", action='store_true', + help="Derives the precursor_mz from the parent mass and adduct or charge.") + parser.add_argument("--estimate_from_adduct", type=str, help="estimate from adduct.") parser.add_argument("-reduce_to_top_n_peaks", action='store_true', help="reduce to top n peaks filter.") parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.") @@ -51,6 +55,7 @@ or args.mz_range or args.require_smiles or args.require_inchi + or args.derive_precursor_mz_from_parent_mass or args.reduce_to_top_n_peaks): raise ValueError('No filter selected.') @@ -84,6 +89,11 @@ if args.reduce_to_top_n_peaks: spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max) + if args.derive_precursor_mz_from_parent_mass: + spectrum.set("parent_mass", float(spectrum.get('parent_mass'))) + precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct) + spectrum.set("precursor_mz", precursor_mz) + if args.require_smiles and spectrum is not None: spectrum = require_key(spectrum, "smiles") diff -r a4661ff81aa4 -r ca5a8db023e1 test-data/filtering/derive_precursor_mz.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/derive_precursor_mz.msp Thu Dec 14 13:45:58 2023 +0000 @@ -0,0 +1,48 @@ +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C14H10 +INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N +SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Phenanthrene +RETENTION_TIME: None +RETENTION_INDEX: 1832.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 177.070224 +NUM PEAKS: 5 +152.0619 0.1657993569424221 +176.062 0.24558560966311757 +177.06982 0.12764433529926775 +178.0775 1.0 +179.08078 0.16394988149600653 + diff -r a4661ff81aa4 -r ca5a8db023e1 test-data/filtering/derive_precursor_mz_out.msp --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/derive_precursor_mz_out.msp Thu Dec 14 13:45:58 2023 +0000 @@ -0,0 +1,50 @@ +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C14H10 +INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N +SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Phenanthrene +RETENTION_TIME: None +RETENTION_INDEX: 1832.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 177.070224 +PRECURSOR_MZ: 177.06967542009076 +NUM PEAKS: 5 +152.0619 0.1657993569424221 +176.062 0.24558560966311757 +177.06982 0.12764433529926775 +178.0775 1.0 +179.08078 0.16394988149600653 +