Mercurial > repos > recetox > matchms_filtering
changeset 13:ca5a8db023e1 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 98223db312c30b0e121a1422a9534a3db3fbf0c0
author | recetox |
---|---|
date | Thu, 14 Dec 2023 13:45:58 +0000 |
parents | a4661ff81aa4 |
children | da15e8ea3b28 |
files | matchms_filtering.xml matchms_filtering_wrapper.py test-data/filtering/derive_precursor_mz.msp test-data/filtering/derive_precursor_mz_out.msp |
diffstat | 4 files changed, 133 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/matchms_filtering.xml Mon Dec 04 19:12:43 2023 +0000 +++ b/matchms_filtering.xml Thu Dec 14 13:45:58 2023 +0000 @@ -1,4 +1,4 @@ -<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy0" profile="21.09"> +<tool id="matchms_filtering" name="matchms filtering" version="@TOOL_VERSION@+galaxy1" profile="21.09"> <description>filter and normalize mass spectrometry data</description> <macros> @@ -55,6 +55,10 @@ #if $require_inchi_is_true == "TRUE" -require_inchi \ #end if + #if $derive_precursor_mz_from_parent_mass.is_true == "TRUE" + -derive_precursor_mz_from_parent_mass \ + --estimate_from_adduct "${derive_precursor_mz_from_parent_mass.estimate_from_adduct}" \ + #end if #if $reduce_to_top_n_peaks.is_true == "TRUE" -reduce_to_top_n_peaks \ --n_max "$reduce_to_top_n_peaks.n_max" \ @@ -104,6 +108,18 @@ <param name="require_inchi_is_true" label="Require INCHI" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" help="Remove spectra that does not contain INCHI." /> + <conditional name="derive_precursor_mz_from_parent_mass"> + <param name="is_true" label="Derive precursor_mz from parent_mass" type="select" + help="Derives the precursor_mz from the parent mass and adduct or charge."> + <option value="FALSE" selected="true">FALSE</option> + <option value="TRUE">TRUE</option> + </param> + <when value="TRUE"> + <param label="Estimate from adduct" name="estimate_from_adduct" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" /> + </when> + <when value="FALSE"></when> + </conditional> + <conditional name="reduce_to_top_n_peaks"> <param name="is_true" label="Reduce to top n peaks" type="select" help="Lowest intensity peaks will be removed when it has more peaks than desired."> @@ -174,6 +190,14 @@ </section> <output name="output" file="filtering/reduce_to_top_n_peaks.msp" ftype="msp"/> </test> + <test> + <param name="spectra" value="filtering/derive_precursor_mz.msp" ftype="msp"/> + <section name="derive_precursor_mz_from_parent_mass"> + <param name="is_true" value="TRUE"/> + <param name="estimate_from_adduct" value="TRUE"/> + </section> + <output name="output" file="filtering/derive_precursor_mz_out.msp" ftype="msp"/> + </test> </tests> <help>
--- a/matchms_filtering_wrapper.py Mon Dec 04 19:12:43 2023 +0000 +++ b/matchms_filtering_wrapper.py Thu Dec 14 13:45:58 2023 +0000 @@ -6,6 +6,7 @@ add_retention_index, add_retention_time, clean_compound_name from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \ select_by_relative_intensity +from matchms.filtering.filter_utils.derive_precursor_mz_and_parent_mass import derive_precursor_mz_from_parent_mass from matchms.importing import load_from_mgf, load_from_msp @@ -39,6 +40,9 @@ help="Remove spectra that does not contain SMILES.") parser.add_argument("-require_inchi", action='store_true', help="Remove spectra that does not contain INCHI.") + parser.add_argument("-derive_precursor_mz_from_parent_mass", action='store_true', + help="Derives the precursor_mz from the parent mass and adduct or charge.") + parser.add_argument("--estimate_from_adduct", type=str, help="estimate from adduct.") parser.add_argument("-reduce_to_top_n_peaks", action='store_true', help="reduce to top n peaks filter.") parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.") @@ -51,6 +55,7 @@ or args.mz_range or args.require_smiles or args.require_inchi + or args.derive_precursor_mz_from_parent_mass or args.reduce_to_top_n_peaks): raise ValueError('No filter selected.') @@ -84,6 +89,11 @@ if args.reduce_to_top_n_peaks: spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max) + if args.derive_precursor_mz_from_parent_mass: + spectrum.set("parent_mass", float(spectrum.get('parent_mass'))) + precursor_mz = derive_precursor_mz_from_parent_mass(spectrum, args.estimate_from_adduct) + spectrum.set("precursor_mz", precursor_mz) + if args.require_smiles and spectrum is not None: spectrum = require_key(spectrum, "smiles")
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/derive_precursor_mz.msp Thu Dec 14 13:45:58 2023 +0000 @@ -0,0 +1,48 @@ +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C14H10 +INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N +SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Phenanthrene +RETENTION_TIME: None +RETENTION_INDEX: 1832.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 177.070224 +NUM PEAKS: 5 +152.0619 0.1657993569424221 +176.062 0.24558560966311757 +177.06982 0.12764433529926775 +178.0775 1.0 +179.08078 0.16394988149600653 +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/filtering/derive_precursor_mz_out.msp Thu Dec 14 13:45:58 2023 +0000 @@ -0,0 +1,50 @@ +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C20H12 +INCHIKEY: CSHWQDPOILHKBI-UHFFFAOYSA-N +SMILES: C1=CC2=C3C(=C1)C1=CC=CC4=C1C(=CC=C4)C3=CC=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Perylene +RETENTION_TIME: None +RETENTION_INDEX: 2886.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 251.08595400000002 +PRECURSOR_MZ: 251.08540542009078 +NUM PEAKS: 3 +250.07765 0.3282529462971431 +252.09323 1.0 +253.09656 0.20573802940517583 + +SCANNUMBER: -1 +IONMODE: positive +SPECTRUMTYPE: Centroid +FORMULA: C14H10 +INCHIKEY: YNPNZTXNASCQKK-UHFFFAOYSA-N +SMILES: C1=CC2=C(C=C1)C1=C(C=CC=C1)C=C2 +AUTHORS: Price et al., RECETOX, Masaryk University (CZ) +INSTRUMENT: Q Exactive GC Orbitrap GC-MS/MS +IONIZATION: EI+ +LICENSE: CC BY-NC +COMPOUND_NAME: Phenanthrene +RETENTION_TIME: None +RETENTION_INDEX: 1832.9 +ADDUCT: [M]+ +COLLISION_ENERGY: 70eV +INSTRUMENT_TYPE: GC-EI-Orbitrap +CHARGE: 1 +PARENT_MASS: 177.070224 +PRECURSOR_MZ: 177.06967542009076 +NUM PEAKS: 5 +152.0619 0.1657993569424221 +176.062 0.24558560966311757 +177.06982 0.12764433529926775 +178.0775 1.0 +179.08078 0.16394988149600653 +