changeset 0:3d275fbdf741 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
author recetox
date Tue, 22 Mar 2022 16:08:03 +0000
parents
children b8e9a7c7add2
files macros.xml matchms_filtering.xml matchms_filtering_wrapper.py test-data/filtering/clean_metadata.msp test-data/filtering/default_filters.msp test-data/filtering/input.msp test-data/filtering/mz_range.msp test-data/filtering/normalise_intensities.msp test-data/filtering/relative_intensity.msp
diffstat 9 files changed, 1264 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Tue Mar 22 16:08:03 2022 +0000
@@ -0,0 +1,70 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.14.0</token>
+
+    <xml name="creator">
+        <creator>
+            <person
+                givenName="Helge"
+                familyName="Hecht"
+                url="https://github.com/hechth"
+                identifier="0000-0001-6744-996X" />
+            <organization
+                url="https://www.recetox.muni.cz/"
+                email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+                name="RECETOX MUNI" />
+        </creator>
+    </xml>
+
+    <token name="@HELP_matchms@">
+        <![CDATA[
+            Documentation
+                For documentation on the tool see https://github.com/matchms/matchms/blob/master/README.rst
+                and https://matchms.readthedocs.io/en/latest/.
+
+            Upstream Tools
+                +-----------+---------------+--------+-----------+
+                | Name      | Output File   | Format | Parameter |
+                +===========+===============+========+===========+
+                | RAMClustR | Mass spectra  | msp    | references|
+                +-----------+---------------+--------+-----------+
+                | RAMClustR | Mass spectra  | msp    | queries   |
+                +-----------+---------------+--------+-----------+
+
+            Downstream Tools
+                The outputs are two tsv datasets. One containing the similarity scores and the other number of matched peaks.
+        ]]>
+    </token>
+
+    <token name="@HELP_formatter@">
+        <![CDATA[
+            Usage
+                This tool creates user friendly tables from the data matrices produces by matchms.
+                The tool can be operated on two modes based on (i) thresholds or (ii) top k matches.
+
+            Input Table Format
+                The tool expects two data matrices with the format as depicted below.
+                The tool assumes the reference compound labels as row labels and the query labels as column labels (as naturally outputted by matchms).
+
+                +----------+------+------+-----+
+                |          | C001 | C002 | ... |
+                +==========+======+======+=====+
+                | Perylene | 0.1  | 0.0  | ... |
+                +----------+------+------+-----+
+                | Glycine  | 0.5  | 0.34 | ... |
+                +----------+------+------+-----+
+                |   ...    | ...  | ...  | ... |
+                +----------+------+------+-----+
+
+            Output Table Format
+                +----------+-----------+---------+--------+
+                | query    | reference | matches | scores |
+                +==========+===========+=========+========+
+                | C001     | Glycine   |      6  | 0.5    |
+                +----------+-----------+---------+--------+
+                | C002     | Glycine   |     3   | 0.34   |
+                +----------+-----------+---------+--------+
+                |   ...    | ...       | ...     | ...    |
+                +----------+-----------+---------+--------+
+        ]]>
+    </token>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matchms_filtering.xml	Tue Mar 22 16:08:03 2022 +0000
@@ -0,0 +1,154 @@
+<tool id="matchms_filtering" name="matchMS filtering" version="@TOOL_VERSION@+galaxy1">
+    <description>filter and normalize mass spectrometry data</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="creator"/>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
+        <requirement type="package" version="1.1.4">pandas</requirement>
+    </requirements>
+
+    <environment_variables>
+        <environment_variable name="MPLCONFIGDIR">\$_GALAXY_JOB_TMP_DIR</environment_variable>
+    </environment_variables>
+
+    <command detect_errors="exit_code"><![CDATA[
+        sh ${matchms_python_cli}
+    ]]> </command>
+
+    <configfiles>
+        <configfile name="matchms_python_cli">
+            python3 ${__tool_directory__}/matchms_filtering_wrapper.py \
+            --spectra "$spectra" \
+            --spectra_format "$spectra.ext" \
+            #if $normalise_intensities.is_true
+                -normalise_intensities \
+            #end if
+            #if $default_filters.is_true
+                -default_filters \
+            #end if
+            #if $clean_metadata.is_true
+                -clean_metadata \
+            #end if
+            #if $relative_intensity.is_true
+                -relative_intensity \
+                --from_intensity "$relative_intensity.from_intensity" \
+                --to_intensity "$relative_intensity.to_intensity" \
+            #end if
+            #if $mz_range.is_true
+                -mz_range \
+                --from_mz "$mz_range.from_mz" \
+                --to_mz "$mz_range.to_mz" \
+            #end if
+            --output "$output"
+        </configfile>
+    </configfiles>
+
+    <inputs>
+        <param label="Spectra file" name="spectra" type="data" format="msp,mgf" help="Mass spectra file to be filtered." />
+
+        <conditional name="normalise_intensities">
+            <param name="is_true" label="Normalize intensities" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
+                   help="Normalize intensities of peaks (and losses) to unit height."/>
+        </conditional>
+
+        <conditional name="default_filters">
+            <param name="is_true" label="Apply default filters" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
+                   help="Collection of filters that are considered default and that do no require any (factory) arguments."/>
+        </conditional>
+
+        <conditional name="clean_metadata">
+            <param name="is_true" label="Clean metadata" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
+                   help="Apply all adding and cleaning filters if possible, so that the spectra have canonical metadata." />
+        </conditional>
+
+        <conditional name="relative_intensity">
+            <param name="is_true" label="Filter relative intensity" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
+                   help="Keep only peaks within set relative intensity range (keep if maximum >= intensity >= minimum)." />
+            <when value="TRUE">
+                <param label="Minimum intensity" name="from_intensity" optional="true" type="float" help="Lower bound for intensity filter." />
+                <param label="Maximum intensity" name="to_intensity" optional="true" type="float" help="Upper bound for intensity filter." />
+            </when>
+        </conditional>
+
+        <conditional name="mz_range">
+            <param name="is_true" label="Filter m/z range" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false"
+                   help="Keep only peaks between set m/z range (keep if maximum >= m/z >= minimum)." />
+            <when value="TRUE">
+                <param label="Minimum m/z" name="from_mz" optional="true" type="float" help="Lower bound for m/z filter." />
+                <param label="Maximum m/z" name="to_mz" optional="true" type="float" help="Upper bound for m/z filter." />
+            </when>
+        </conditional>
+
+    </inputs>
+
+    <outputs>
+        <data label="${tool.name} on ${on_string}" name="output" format_source="spectra" />
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="spectra" value="filtering/input.msp" ftype="msp"/>
+            <section name="normalise_intensities">
+                <param name="is_true" value="TRUE"/>
+            </section>
+            <output name="output" file="filtering/normalise_intensities.msp" ftype="msp"/>
+        </test>
+        <test>
+            <param name="spectra" value="filtering/input.msp" ftype="msp"/>
+            <section name="default_filters">
+                <param name="is_true" value="TRUE"/>
+            </section>
+            <output name="output" file="filtering/default_filters.msp" ftype="msp"/>
+        </test>
+        <test>
+            <param name="spectra" value="filtering/input.msp" ftype="msp"/>
+            <section name="clean_metadata">
+                <param name="is_true" value="TRUE"/>
+            </section>
+            <output name="output" file="filtering/clean_metadata.msp" ftype="msp"/>
+        </test>
+        <test>
+            <param name="spectra" value="filtering/input.msp" ftype="msp"/>
+            <section name="relative_intensity">
+                <param name="is_true" value="TRUE"/>
+                <param name="from_intensity" value="0.3"/>
+                <param name="to_intensity" value="0.9"/>
+            </section>
+            <output name="output" file="filtering/relative_intensity.msp" ftype="msp"/>
+        </test>
+        <test>
+            <param name="spectra" value="filtering/input.msp" ftype="msp"/>
+            <section name="mz_range">
+                <param name="is_true" value="TRUE"/>
+                <param name="from_mz" value="300"/>
+                <param name="to_mz" value="800"/>
+            </section>
+            <output name="output" file="filtering/mz_range.msp" ftype="msp"/>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+    Documentation
+        For documentation on the tool see https://github.com/matchms/matchms/blob/master/README.rst and https://matchms.readthedocs.io/en/latest/.
+
+    Upstream Tools
+        +-----------+---------------+--------+-----------+
+        | Name      | Output File   | Format | Parameter |
+        +===========+===============+========+===========+
+        | RAMClustR | Mass spectra  | msp    | references|
+        +-----------+---------------+--------+-----------+
+        | RAMClustR | Mass spectra  | msp    | queries   |
+        +-----------+---------------+--------+-----------+
+
+    Downstream Tools
+        The output is an msp file after applying the specified filters.
+    ]]></help>
+
+
+    <citations>
+        <citation type="doi">10.5281/zenodo.4589154</citation>
+        <citation type="doi">10.21105/joss.02411</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matchms_filtering_wrapper.py	Tue Mar 22 16:08:03 2022 +0000
@@ -0,0 +1,77 @@
+import argparse
+import sys
+
+from matchms.exporting import save_as_mgf, save_as_msp
+from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\
+    add_retention_index, add_retention_time, clean_compound_name
+from matchms.filtering import default_filters, normalize_intensities, select_by_mz, select_by_relative_intensity
+from matchms.importing import load_from_mgf, load_from_msp
+
+
+def main(argv):
+    parser = argparse.ArgumentParser(description="Compute MSP similarity scores")
+    parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.")
+    parser.add_argument("--spectra_format", type=str, required=True, help="Format of spectra file.")
+    parser.add_argument("--output", type=str, required=True, help="Filtered mass spectra file.")
+    parser.add_argument("-normalise_intensities", action='store_true',
+                        help="Normalize intensities of peaks (and losses) to unit height.")
+    parser.add_argument("-default_filters", action='store_true',
+                        help="Collection of filters that are considered default and that do no require any (factory) arguments.")
+    parser.add_argument("-clean_metadata", action='store_true',
+                        help="Apply all adding and cleaning filters if possible, so that the spectra have canonical metadata.")
+    parser.add_argument("-relative_intensity", action='store_true',
+                        help="Keep only peaks within set relative intensity range (keep if to_intensity >= intensity >= from_intensity).")
+    parser.add_argument("--from_intensity", type=float, help="Lower bound for intensity filter")
+    parser.add_argument("--to_intensity", type=float, help="Upper bound for intensity filter")
+    parser.add_argument("-mz_range", action='store_true',
+                        help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).")
+    parser.add_argument("--from_mz", type=float, help="Lower bound for m/z  filter")
+    parser.add_argument("--to_mz", type=float, help="Upper bound for m/z  filter")
+    args = parser.parse_args()
+
+    if not (args.normalise_intensities
+            or args.default_filters
+            or args.clean_metadata
+            or args.relative_intensity
+            or args.mz_range):
+        raise ValueError('No filter selected.')
+
+    if args.spectra_format == 'msp':
+        spectra = list(load_from_msp(args.spectra))
+    elif args.queries_format == 'mgf':
+        spectra = list(load_from_mgf(args.spectra))
+    else:
+        raise ValueError(f'File format {args.spectra_format} not supported for mass spectra file.')
+
+    filtered_spectra = []
+    for spectrum in spectra:
+        if args.normalise_intensities:
+            spectrum = normalize_intensities(spectrum)
+
+        if args.default_filters:
+            spectrum = default_filters(spectrum)
+
+        if args.clean_metadata:
+            filters = [add_compound_name, add_precursor_mz, add_fingerprint, add_losses, add_parent_mass,
+                       add_retention_index, add_retention_time, clean_compound_name]
+            for metadata_filter in filters:
+                spectrum = metadata_filter(spectrum)
+
+        if args.relative_intensity:
+            spectrum = select_by_relative_intensity(spectrum, args.from_intensity, args.to_intensity)
+
+        if args.mz_range:
+            spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz)
+
+        filtered_spectra.append(spectrum)
+
+    if args.spectra_format == 'msp':
+        save_as_msp(filtered_spectra, args.output)
+    else:
+        save_as_mgf(filtered_spectra, args.output)
+
+    return 0
+
+
+if __name__ == "__main__":
+    main(argv=sys.argv[1:])
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/clean_metadata.msp	Tue Mar 22 16:08:03 2022 +0000
@@ -0,0 +1,197 @@
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C001
+RETENTION_TIME: 38.74
+RETENTION_INDEX: None
+NUM PEAKS: 57
+138.9121    10186226.0
+148.9337    1008656.0
+175.0641    26780143.0
+186.1095    2675456.0
+196.8658    21390430.0
+198.8647    21688594.0
+200.8848    7742528.0
+206.9034    26130980.0
+216.9205    32607700.0
+234.0134    2550129.0
+254.8252    23747536.0
+256.8215    31377637.0
+258.8237    15532799.0
+266.8652    9805546.0
+268.8537    3090354.0
+306.9914    3169316.0
+312.7841    10051801.0
+316.7777    10734168.0
+322.8157    6317648.0
+324.9549    8619910.0
+334.849     4178412.0
+342.8093    3285552.0
+349.9455    2050695.0
+350.9875    6150799.0
+351.941     1965882.0
+366.8281    3253770.0
+370.7418    9765463.0
+372.7383    19374863.0
+382.8218    12815572.0
+384.8177    8311500.0
+392.7685    10913351.0
+413.2664    3965867.0
+426.7772    5431633.0
+428.7834    8554675.0
+434.7287    9943329.0
+436.8161    3705247.0
+440.7322    10603010.0
+442.7401    8271752.0
+450.7016    8762673.0
+460.7076    4528973.0
+462.7862    2123666.0
+484.7242    4273989.0
+486.7743    4886062.0
+488.6825    12267966.0
+492.744     7662344.0
+494.8953    7188793.0
+498.8794    6811405.0
+500.8484    6520691.0
+502.7832    3567833.0
+510.763     4989757.0
+518.7415    4243468.0
+546.6093    7177067.0
+550.6949    6104789.0
+566.5977    5171811.0
+612.6927    2005587.0
+676.6436    1982714.0
+800.4451    2792137.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C002
+RETENTION_TIME: 520.25
+RETENTION_INDEX: 1234.5
+NUM PEAKS: 35
+131.1733    1971789.0
+267.2688    6103973.0
+279.0196    1946255.0
+289.6491    46498377.0
+301.1565    15185412.0
+309.1649    18045974.0
+310.1623    295359836.0
+311.1658    13124727.0
+312.0296    38757284.0
+330.6757    12666597.0
+525.375     1073323842.0
+526.3783    181668883.0
+527.3812    23642795.0
+551.3321    111616808.0
+552.3348    28340614.0
+553.3314    2609936.0
+562.3269    7538206.0
+578.2905    7578406.0
+619.3008    4742103.0
+624.296     11790213.0
+813.5403    25060147.0
+814.5336    5865975.0
+955.1171    2322927.0
+1047.7378   150394804.0
+1048.7399   90978863.0
+1049.7432   29946438.0
+1050.7453   6807767.0
+1069.7158   5074652.0
+1074.1979   3402288.0
+1075.1968   33352763.0
+1076.2004   10417953.0
+1101.6535   2023916.0
+1206.3127   3738816.0
+1216.8041   4439324.0
+1217.807    3565334.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C003
+RETENTION_TIME: 483.67
+RETENTION_INDEX: None
+NUM PEAKS: 26
+265.2529    11366224.0
+266.2564    1420444.0
+279.6362    29849749.0
+280.6546    8848921.0
+288.6414    202172046.0
+378.2093    15309961.0
+379.1966    2902366.0
+522.3565    4089569222.0
+523.354     1201714423.0
+549.3267    63300808.0
+576.2749    7386007.0
+577.3074    2354251.0
+617.2778    2323470.0
+625.4543    4040374.0
+796.9808    13576738.0
+797.9841    6368973.0
+809.9883    12596682.0
+810.9916    6601055.0
+1043.7028   144351468.0
+1044.7068   83271854.0
+1045.706    27998321.0
+1046.7131   6505178.0
+1058.1594   20718345.0
+1059.1626   6608764.0
+1071.1639   15461047.0
+1072.1671   5096642.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C004
+RETENTION_TIME: 473.48
+RETENTION_INDEX: None
+NUM PEAKS: 24
+124.1405    6517662.0
+170.2437    1237313.0
+275.6336    28001849.0
+296.147     190395687.0
+482.3247    145772322.0
+483.3283    36245876.0
+496.34      12577588056.0
+497.3442    3337125302.0
+498.3462    532285213.0
+499.3493    68176083.0
+770.964     49250157.0
+771.9675    22666873.0
+783.9721    9839299.0
+784.9749    3622908.0
+949.6233    8009033.0
+950.6274    3674694.0
+991.6726    1420557258.0
+992.6749    763118028.0
+993.6787    239161906.0
+994.6801    53549573.0
+1017.6897   168186952.0
+1018.6656   120599518.0
+1019.6555   57647644.0
+1020.6591   12469103.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C005
+RETENTION_TIME: 41.72
+RETENTION_INDEX: None
+NUM PEAKS: 20
+218.1386    14009249.0
+337.0623    88672453.0
+338.0654    8770055.0
+353.0361    37061354.0
+359.0443    48435582.0
+360.0459    5025128.0
+375.018     29159485.0
+376.0216    2740193.0
+381.0261    13522755.0
+396.9999    10317665.0
+417.0027    13822994.0
+418.9966    4386311.0
+432.9764    9779399.0
+438.9851    11307111.0
+440.9796    3364168.0
+454.9592    9820452.0
+456.9603    3774845.0
+470.9263    3632486.0
+512.8989    4072570.0
+572.871     3485486.0
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/default_filters.msp	Tue Mar 22 16:08:03 2022 +0000
@@ -0,0 +1,199 @@
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C001
+RETENTION_TIME: 38.74
+RETENTION_INDEX: None
+CHARGE: -1
+NUM PEAKS: 57
+138.9121    10186226.0
+148.9337    1008656.0
+175.0641    26780143.0
+186.1095    2675456.0
+196.8658    21390430.0
+198.8647    21688594.0
+200.8848    7742528.0
+206.9034    26130980.0
+216.9205    32607700.0
+234.0134    2550129.0
+254.8252    23747536.0
+256.8215    31377637.0
+258.8237    15532799.0
+266.8652    9805546.0
+268.8537    3090354.0
+306.9914    3169316.0
+312.7841    10051801.0
+316.7777    10734168.0
+322.8157    6317648.0
+324.9549    8619910.0
+334.849     4178412.0
+342.8093    3285552.0
+349.9455    2050695.0
+350.9875    6150799.0
+351.941     1965882.0
+366.8281    3253770.0
+370.7418    9765463.0
+372.7383    19374863.0
+382.8218    12815572.0
+384.8177    8311500.0
+392.7685    10913351.0
+413.2664    3965867.0
+426.7772    5431633.0
+428.7834    8554675.0
+434.7287    9943329.0
+436.8161    3705247.0
+440.7322    10603010.0
+442.7401    8271752.0
+450.7016    8762673.0
+460.7076    4528973.0
+462.7862    2123666.0
+484.7242    4273989.0
+486.7743    4886062.0
+488.6825    12267966.0
+492.744     7662344.0
+494.8953    7188793.0
+498.8794    6811405.0
+500.8484    6520691.0
+502.7832    3567833.0
+510.763     4989757.0
+518.7415    4243468.0
+546.6093    7177067.0
+550.6949    6104789.0
+566.5977    5171811.0
+612.6927    2005587.0
+676.6436    1982714.0
+800.4451    2792137.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C002
+RETENTION_TIME: 520.25
+RETENTION_INDEX: 1234.5
+CHARGE: -1
+NUM PEAKS: 35
+131.1733    1971789.0
+267.2688    6103973.0
+279.0196    1946255.0
+289.6491    46498377.0
+301.1565    15185412.0
+309.1649    18045974.0
+310.1623    295359836.0
+311.1658    13124727.0
+312.0296    38757284.0
+330.6757    12666597.0
+525.375     1073323842.0
+526.3783    181668883.0
+527.3812    23642795.0
+551.3321    111616808.0
+552.3348    28340614.0
+553.3314    2609936.0
+562.3269    7538206.0
+578.2905    7578406.0
+619.3008    4742103.0
+624.296     11790213.0
+813.5403    25060147.0
+814.5336    5865975.0
+955.1171    2322927.0
+1047.7378   150394804.0
+1048.7399   90978863.0
+1049.7432   29946438.0
+1050.7453   6807767.0
+1069.7158   5074652.0
+1074.1979   3402288.0
+1075.1968   33352763.0
+1076.2004   10417953.0
+1101.6535   2023916.0
+1206.3127   3738816.0
+1216.8041   4439324.0
+1217.807    3565334.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C003
+RETENTION_TIME: 483.67
+CHARGE: -1
+NUM PEAKS: 26
+265.2529    11366224.0
+266.2564    1420444.0
+279.6362    29849749.0
+280.6546    8848921.0
+288.6414    202172046.0
+378.2093    15309961.0
+379.1966    2902366.0
+522.3565    4089569222.0
+523.354     1201714423.0
+549.3267    63300808.0
+576.2749    7386007.0
+577.3074    2354251.0
+617.2778    2323470.0
+625.4543    4040374.0
+796.9808    13576738.0
+797.9841    6368973.0
+809.9883    12596682.0
+810.9916    6601055.0
+1043.7028   144351468.0
+1044.7068   83271854.0
+1045.706    27998321.0
+1046.7131   6505178.0
+1058.1594   20718345.0
+1059.1626   6608764.0
+1071.1639   15461047.0
+1072.1671   5096642.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C004
+RETENTION_TIME: 473.48
+CHARGE: -1
+NUM PEAKS: 24
+124.1405    6517662.0
+170.2437    1237313.0
+275.6336    28001849.0
+296.147     190395687.0
+482.3247    145772322.0
+483.3283    36245876.0
+496.34      12577588056.0
+497.3442    3337125302.0
+498.3462    532285213.0
+499.3493    68176083.0
+770.964     49250157.0
+771.9675    22666873.0
+783.9721    9839299.0
+784.9749    3622908.0
+949.6233    8009033.0
+950.6274    3674694.0
+991.6726    1420557258.0
+992.6749    763118028.0
+993.6787    239161906.0
+994.6801    53549573.0
+1017.6897   168186952.0
+1018.6656   120599518.0
+1019.6555   57647644.0
+1020.6591   12469103.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C005
+RETENTION_TIME: 41.72
+CHARGE: -1
+NUM PEAKS: 20
+218.1386    14009249.0
+337.0623    88672453.0
+338.0654    8770055.0
+353.0361    37061354.0
+359.0443    48435582.0
+360.0459    5025128.0
+375.018     29159485.0
+376.0216    2740193.0
+381.0261    13522755.0
+396.9999    10317665.0
+417.0027    13822994.0
+418.9966    4386311.0
+432.9764    9779399.0
+438.9851    11307111.0
+440.9796    3364168.0
+454.9592    9820452.0
+456.9603    3774845.0
+470.9263    3632486.0
+512.8989    4072570.0
+572.871     3485486.0
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/input.msp	Tue Mar 22 16:08:03 2022 +0000
@@ -0,0 +1,193 @@
+NAME:C001
+IONMODE:Negative
+RETENTIONTIME:38.74
+RETENTIONINDEX: -1
+SPECTRUMTYPE:Centroid
+Num Peaks:57
+216.9205 32607700
+256.8215 31377637
+175.0641 26780143
+206.9034 26130980
+254.8252 23747536
+198.8647 21688594
+196.8658 21390430
+372.7383 19374863
+258.8237 15532799
+382.8218 12815572
+488.6825 12267966
+392.7685 10913351
+316.7777 10734168
+440.7322 10603010
+138.9121 10186226
+312.7841 10051801
+434.7287 9943329
+266.8652 9805546
+370.7418 9765463
+450.7016 8762673
+324.9549 8619910
+428.7834 8554675
+384.8177 8311500
+442.7401 8271752
+200.8848 7742528
+492.744 7662344
+494.8953 7188793
+546.6093 7177067
+498.8794 6811405
+500.8484 6520691
+322.8157 6317648
+350.9875 6150799
+550.6949 6104789
+426.7772 5431633
+566.5977 5171811
+510.763 4989757
+486.7743 4886062
+460.7076 4528973
+484.7242 4273989
+518.7415 4243468
+334.849 4178412
+413.2664 3965867
+436.8161 3705247
+502.7832 3567833
+342.8093 3285552
+366.8281 3253770
+306.9914 3169316
+268.8537 3090354
+800.4451 2792137
+186.1095 2675456
+234.0134 2550129
+462.7862 2123666
+349.9455 2050695
+612.6927 2005587
+676.6436 1982714
+351.941 1965882
+148.9337 1008656
+
+NAME:C002
+IONMODE:Negative
+RETENTIONTIME:520.25
+RETENTIONINDEX: 1234.5
+SPECTRUMTYPE:Centroid
+Num Peaks:35
+525.375 1073323842
+310.1623 295359836
+526.3783 181668883
+1047.7378 150394804
+551.3321 111616808
+1048.7399 90978863
+289.6491 46498377
+312.0296 38757284
+1075.1968 33352763
+1049.7432 29946438
+552.3348 28340614
+813.5403 25060147
+527.3812 23642795
+309.1649 18045974
+301.1565 15185412
+311.1658 13124727
+330.6757 12666597
+624.296 11790213
+1076.2004 10417953
+578.2905 7578406
+562.3269 7538206
+1050.7453 6807767
+267.2688 6103973
+814.5336 5865975
+1069.7158 5074652
+619.3008 4742103
+1216.8041 4439324
+1206.3127 3738816
+1217.807 3565334
+1074.1979 3402288
+553.3314 2609936
+955.1171 2322927
+1101.6535 2023916
+131.1733 1971789
+279.0196 1946255
+
+NAME:C003
+IONMODE:Negative
+RETENTIONTIME:483.67
+SPECTRUMTYPE:Centroid
+Num Peaks:26
+522.3565 4089569222
+523.354 1201714423
+288.6414 202172046
+1043.7028 144351468
+1044.7068 83271854
+549.3267 63300808
+279.6362 29849749
+1045.706 27998321
+1058.1594 20718345
+1071.1639 15461047
+378.2093 15309961
+796.9808 13576738
+809.9883 12596682
+265.2529 11366224
+280.6546 8848921
+576.2749 7386007
+1059.1626 6608764
+810.9916 6601055
+1046.7131 6505178
+797.9841 6368973
+1072.1671 5096642
+625.4543 4040374
+379.1966 2902366
+577.3074 2354251
+617.2778 2323470
+266.2564 1420444
+
+NAME:C004
+IONMODE:Negative
+RETENTIONTIME:473.48
+SPECTRUMTYPE:Centroid
+Num Peaks:24
+496.34 12577588056
+497.3442 3337125302
+991.6726 1420557258
+992.6749 763118028
+498.3462 532285213
+993.6787 239161906
+296.147 190395687
+1017.6897 168186952
+482.3247 145772322
+1018.6656 120599518
+499.3493 68176083
+1019.6555 57647644
+994.6801 53549573
+770.964 49250157
+483.3283 36245876
+275.6336 28001849
+771.9675 22666873
+1020.6591 12469103
+783.9721 9839299
+949.6233 8009033
+124.1405 6517662
+950.6274 3674694
+784.9749 3622908
+170.2437 1237313
+
+NAME:C005
+IONMODE:Negative
+RETENTIONTIME:41.72
+SPECTRUMTYPE:Centroid
+Num Peaks:20
+337.0623 88672453
+359.0443 48435582
+353.0361 37061354
+375.018 29159485
+218.1386 14009249
+417.0027 13822994
+381.0261 13522755
+438.9851 11307111
+396.9999 10317665
+454.9592 9820452
+432.9764 9779399
+338.0654 8770055
+360.0459 5025128
+418.9966 4386311
+512.8989 4072570
+456.9603 3774845
+470.9263 3632486
+572.871 3485486
+440.9796 3364168
+376.0216 2740193
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/mz_range.msp	Tue Mar 22 16:08:03 2022 +0000
@@ -0,0 +1,129 @@
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C001
+RETENTION_TIME: 38.74
+RETENTION_INDEX: None
+NUM PEAKS: 41
+306.9914    3169316.0
+312.7841    10051801.0
+316.7777    10734168.0
+322.8157    6317648.0
+324.9549    8619910.0
+334.849     4178412.0
+342.8093    3285552.0
+349.9455    2050695.0
+350.9875    6150799.0
+351.941     1965882.0
+366.8281    3253770.0
+370.7418    9765463.0
+372.7383    19374863.0
+382.8218    12815572.0
+384.8177    8311500.0
+392.7685    10913351.0
+413.2664    3965867.0
+426.7772    5431633.0
+428.7834    8554675.0
+434.7287    9943329.0
+436.8161    3705247.0
+440.7322    10603010.0
+442.7401    8271752.0
+450.7016    8762673.0
+460.7076    4528973.0
+462.7862    2123666.0
+484.7242    4273989.0
+486.7743    4886062.0
+488.6825    12267966.0
+492.744     7662344.0
+494.8953    7188793.0
+498.8794    6811405.0
+500.8484    6520691.0
+502.7832    3567833.0
+510.763     4989757.0
+518.7415    4243468.0
+546.6093    7177067.0
+550.6949    6104789.0
+566.5977    5171811.0
+612.6927    2005587.0
+676.6436    1982714.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C002
+RETENTION_TIME: 520.25
+RETENTION_INDEX: 1234.5
+NUM PEAKS: 16
+301.1565    15185412.0
+309.1649    18045974.0
+310.1623    295359836.0
+311.1658    13124727.0
+312.0296    38757284.0
+330.6757    12666597.0
+525.375     1073323842.0
+526.3783    181668883.0
+527.3812    23642795.0
+551.3321    111616808.0
+552.3348    28340614.0
+553.3314    2609936.0
+562.3269    7538206.0
+578.2905    7578406.0
+619.3008    4742103.0
+624.296     11790213.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C003
+RETENTION_TIME: 483.67
+NUM PEAKS: 11
+378.2093    15309961.0
+379.1966    2902366.0
+522.3565    4089569222.0
+523.354     1201714423.0
+549.3267    63300808.0
+576.2749    7386007.0
+577.3074    2354251.0
+617.2778    2323470.0
+625.4543    4040374.0
+796.9808    13576738.0
+797.9841    6368973.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C004
+RETENTION_TIME: 473.48
+NUM PEAKS: 10
+482.3247    145772322.0
+483.3283    36245876.0
+496.34      12577588056.0
+497.3442    3337125302.0
+498.3462    532285213.0
+499.3493    68176083.0
+770.964     49250157.0
+771.9675    22666873.0
+783.9721    9839299.0
+784.9749    3622908.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C005
+RETENTION_TIME: 41.72
+NUM PEAKS: 19
+337.0623    88672453.0
+338.0654    8770055.0
+353.0361    37061354.0
+359.0443    48435582.0
+360.0459    5025128.0
+375.018     29159485.0
+376.0216    2740193.0
+381.0261    13522755.0
+396.9999    10317665.0
+417.0027    13822994.0
+418.9966    4386311.0
+432.9764    9779399.0
+438.9851    11307111.0
+440.9796    3364168.0
+454.9592    9820452.0
+456.9603    3774845.0
+470.9263    3632486.0
+512.8989    4072570.0
+572.871     3485486.0
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/normalise_intensities.msp	Tue Mar 22 16:08:03 2022 +0000
@@ -0,0 +1,194 @@
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C001
+RETENTION_TIME: 38.74
+RETENTION_INDEX: None
+NUM PEAKS: 57
+138.9121    0.31238713555387226
+148.9337    0.03093306182282099
+175.0641    0.8212827951680125
+186.1095    0.08204982258791635
+196.8658    0.6559932163262051
+198.8647    0.6651371915222478
+200.8848    0.23744477531380626
+206.9034    0.8013745219687375
+216.9205    1.0
+234.0134    0.0782063439003671
+254.8252    0.7282800074828951
+256.8215    0.9622769161885076
+258.8237    0.4763537140000675
+266.8652    0.30071259242448867
+268.8537    0.09477374975849262
+306.9914    0.09719532503059093
+312.7841    0.30826464301376666
+316.7777    0.3291912033047409
+322.8157    0.1937471210787636
+324.9549    0.2643519782137348
+334.849     0.12814188059875428
+342.8093    0.10076000453880525
+349.9455    0.06288990023828728
+350.9875    0.18863026217733847
+351.941     0.06028888882073866
+366.8281    0.09978532677864431
+370.7418    0.29948334289140294
+372.7383    0.5941806076478868
+382.8218    0.3930228749651156
+384.8177    0.2548937827568335
+392.7685    0.33468631642219476
+413.2664    0.1216236349083192
+426.7772    0.1665751647616974
+428.7834    0.262351377128715
+434.7287    0.3049380667756389
+436.8161    0.11363104420121628
+440.7322    0.32516890182380237
+442.7401    0.25367480687076976
+450.7016    0.268730177228078
+460.7076    0.13889274619185038
+462.7862    0.06512774590050817
+484.7242    0.13107299809554185
+486.7743    0.14984380989766222
+488.6825    0.376229111528872
+492.744     0.23498572423077985
+494.8953    0.22046305013846423
+498.8794    0.20888946475832396
+500.8484    0.19997396320500985
+502.7832    0.10941688619559184
+510.763     0.15302388699601627
+518.7415    0.13013699218282798
+546.6093    0.2201034418250904
+550.6949    0.18721924576097057
+566.5977    0.15860704680182902
+612.6927    0.061506545999871196
+676.6436    0.06080508591528995
+800.4451    0.08562814917948829
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C002
+RETENTION_TIME: 520.25
+RETENTION_INDEX: 1234.5
+NUM PEAKS: 35
+131.1733    0.0018370867419900284
+267.2688    0.005686981655625982
+279.0196    0.0018132970906277511
+289.6491    0.04332185234360982
+301.1565    0.014148024487841387
+309.1649    0.01681316793110052
+310.1623    0.2751824048272655
+311.1658    0.012228114653210135
+312.0296    0.03610959011940033
+330.6757    0.011801281686240601
+525.375     1.0
+526.3783    0.16925822001818533
+527.3812    0.022027643545069038
+551.3321    0.10399173449088443
+552.3348    0.026404532249270578
+553.3314    0.00243163889393971
+562.3269    0.0070232353973927655
+578.2905    0.007060689144739971
+619.3008    0.004418147454139941
+624.296     0.010984767633625341
+813.5403    0.023348169508005768
+814.5336    0.005465242427736922
+955.1171    0.0021642368399005527
+1047.7378   0.1401206216753359
+1048.7399   0.08476366539149328
+1049.7432   0.027900654796038715
+1050.7453   0.006342696149667753
+1069.7158   0.004727978454800783
+1074.1979   0.0031698615710057058
+1075.1968   0.03107427758042852
+1076.2004   0.009706253222314986
+1101.6535   0.0018856526994021623
+1206.3127   0.003483399747305716
+1216.8041   0.004136052723591693
+1217.807    0.0033217691254826334
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C003
+RETENTION_TIME: 483.67
+NUM PEAKS: 26
+265.2529    0.0027793206039538215
+266.2564    0.00034733340430054716
+279.6362    0.0072989959038771346
+280.6546    0.002163778266032735
+288.6414    0.049436024927126176
+378.2093    0.0037436610481220017
+379.1966    0.0007096996877780199
+522.3565    1.0
+523.354     0.29384865685493955
+549.3267    0.015478600450010918
+576.2749    0.0018060599048590942
+577.3074    0.0005756721239330571
+617.2778    0.0005681454143142512
+625.4543    0.0009879705613649104
+796.9808    0.0033198455052339984
+797.9841    0.0015573701420036753
+809.9883    0.0030801977705220513
+810.9916    0.0016141198844341264
+1043.7028   0.035297475152995465
+1044.7068   0.020362011126266247
+1045.706    0.0068462763386867055
+1046.7131   0.0015906756058816994
+1058.1594   0.00506614361447774
+1059.1626   0.0016160049240511426
+1071.1639   0.0037806052815603864
+1072.1671   0.0012462539997079428
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C004
+RETENTION_TIME: 473.48
+NUM PEAKS: 24
+124.1405    0.0005181964913289414
+170.2437    9.837442556482469e-05
+275.6336    0.002226328996889195
+296.147     0.015137694616192635
+482.3247    0.011589847063758851
+483.3283    0.00288178272643532
+496.34      1.0
+497.3442    0.2653231515567137
+498.3462    0.04232013408533278
+499.3493    0.005420441717160338
+770.964     0.003915707588825487
+771.9675    0.0018021637295703144
+783.9721    0.0007822882222085712
+784.9749    0.0002880447335267696
+949.6233    0.0006367701791743273
+950.6274    0.00029216205711611197
+991.6726    0.11294353509394345
+992.6749    0.06067284320350776
+993.6787    0.01901492598860482
+994.6801    0.004257539105397458
+1017.6897   0.013371955835345416
+1018.6656   0.00958844553208827
+1019.6555   0.004583362385803359
+1020.6591   0.0009913747329363162
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C005
+RETENTION_TIME: 41.72
+NUM PEAKS: 20
+218.1386    0.15798873862212878
+337.0623    1.0
+338.0654    0.09890394032518758
+353.0361    0.4179579198062785
+359.0443    0.5462303157441691
+360.0459    0.0566706776455141
+375.018     0.3288449119592981
+376.0216    0.03090241565776916
+381.0261    0.15250232222627247
+396.9999    0.1163570494660839
+417.0027    0.1558882553976487
+418.9966    0.049466444781898614
+432.9764    0.11028677643551825
+438.9851    0.12751548668671656
+440.9796    0.037939268467062706
+454.9592    0.11074974998154162
+456.9603    0.042570661713847026
+470.9263    0.04096521385282981
+512.8989    0.04592824335196862
+572.871     0.03930742730214083
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/filtering/relative_intensity.msp	Tue Mar 22 16:08:03 2022 +0000
@@ -0,0 +1,51 @@
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C001
+RETENTION_TIME: 38.74
+RETENTION_INDEX: None
+NUM PEAKS: 16
+138.9121    10186226.0
+175.0641    26780143.0
+196.8658    21390430.0
+198.8647    21688594.0
+206.9034    26130980.0
+254.8252    23747536.0
+258.8237    15532799.0
+266.8652    9805546.0
+312.7841    10051801.0
+316.7777    10734168.0
+372.7383    19374863.0
+382.8218    12815572.0
+392.7685    10913351.0
+434.7287    9943329.0
+440.7322    10603010.0
+488.6825    12267966.0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C002
+RETENTION_TIME: 520.25
+RETENTION_INDEX: 1234.5
+NUM PEAKS: 0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C003
+RETENTION_TIME: 483.67
+NUM PEAKS: 0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C004
+RETENTION_TIME: 473.48
+NUM PEAKS: 0
+
+IONMODE: negative
+SPECTRUMTYPE: Centroid
+COMPOUND_NAME: C005
+RETENTION_TIME: 41.72
+NUM PEAKS: 3
+353.0361    37061354.0
+359.0443    48435582.0
+375.018     29159485.0
+