changeset 0:c2862090e321 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/msp_merge commit 51ff658aecc8738ef57af512229cd155763082d1
author recetox
date Thu, 19 May 2022 12:04:25 +0000
parents
children d38d73004bba
files msp_merge.py msp_merge.xml test-data/1-NITROPYRENE.msp test-data/23-DICHLOROPHENOL.msp test-data/24-DICHLOROPHENOL.msp test-data/sample_output.msp
diffstat 6 files changed, 531 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/msp_merge.py	Thu May 19 12:04:25 2022 +0000
@@ -0,0 +1,30 @@
+import argparse
+from itertools import chain
+from typing import List
+
+from matchms import Spectrum
+from matchms.exporting import save_as_msp
+from matchms.importing import load_from_msp
+
+
+def read_spectra(filenames: str) -> List[Spectrum]:
+    """Read spectra from files.
+
+    Args:
+        filenames (str): Paths to MSP files from which to load each spectrum.
+
+    Returns:
+        List[Spectrum]: Spectra stored in the file.
+    """
+    spectra = list(chain(*[load_from_msp(file) for file in filenames]))
+    return spectra
+
+
+listarg = argparse.ArgumentParser()
+listarg.add_argument('--filenames', nargs='+', type=str)
+listarg.add_argument('--outfilename', type=str)
+args = listarg.parse_args()
+
+if __name__ == "__main__":
+    spectra = read_spectra(args.filenames)
+    save_as_msp(spectra, args.outfilename)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/msp_merge.xml	Thu May 19 12:04:25 2022 +0000
@@ -0,0 +1,43 @@
+<tool id="msp_merge" name="Merge MSP Spectra" version="0.1.0" python_template_version="3.5">
+   <creator>
+        <person
+            givenName="Wudmir"
+            familyName="Rojas"
+            url="https://github.com/wverastegui"
+            identifier="0000-0001-7036-9987" />
+        <person
+            givenName="Helge"
+            familyName="Hecht"
+            url="https://github.com/hechth"
+            identifier="0000-0001-6744-996X" />
+        <organization
+            url="https://www.recetox.muni.cz/"
+            email="GalaxyToolsDevelopmentandDeployment@space.muni.cz"
+            name="RECETOX MUNI"/>
+    </creator>
+    <description>Merge MSP Spectra files</description>
+    <requirements>
+         <requirement type="package" version="0.14.0">matchms</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+       #set newinput = str("' '").join([str($f) for $f in $files])
+        python3  $__tool_directory__/msp_merge.py
+        --filenames    '$newinput'
+        --outfilename  '$outfile'
+    ]]></command>
+    <inputs>
+        <param name="files" type="data" format="msp" multiple="true"/>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="msp" />
+    </outputs>
+    <tests>
+         <test>
+            <param name="files" value="1-NITROPYRENE.msp,23-DICHLOROPHENOL.msp,24-DICHLOROPHENOL.msp" ftype="msp"/>
+            <output name="outfile" file="sample_output.msp" ftype="msp" />
+        </test>
+    </tests> 
+    <help><![CDATA[
+        MSPmerge handles input/output files in MSP format using the matchms.importing and matchms.exporting modules (https://matchms.readthedocs.io/en/latest/index.html).
+    ]]></help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/1-NITROPYRENE.msp	Thu May 19 12:04:25 2022 +0000
@@ -0,0 +1,100 @@
+SYNONYM: 1-NITROPYRENE
+DB#: JP000001
+INCHIKEY: ALRLPDGCPYIVHP-UHFFFAOYSA-N
+MW: 247.063328528
+FORMULA: C16H9NO2
+ACCESSION: JP000001
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: [O-1][N+1](=O)c(c4)c(c1)c(c3c4)c(c2cc3)c(ccc2)c1
+INCHI: InChI=1S/C16H9NO2/c18-17(19)14-9-7-12-5-4-10-2-1-3-11-6-8-13(14)16(12)15(10)11/h1-9H
+SMILES_2: [H]C=1C([H])=C2C([H])=C([H])C3=C([H])C([H])=C(C=4C([H])=C([H])C(C1[H])=C2C34)N(=O)=O
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335755
+MOLECULAR_FORMULA: C16H9NO2
+TOTAL_EXACT_MASS: 247.063328528
+COMPOUND_NAME: 1-NITROPYRENE
+PRECURSOR_MZ: 0
+PARENT_MASS: 247.06333
+NUM PEAKS: 75
+51.0        2.66
+55.0        8.0
+57.0        7.33
+58.0        1.33
+59.0        1.33
+60.0        14.0
+61.0        1.33
+62.0        3.33
+63.0        3.33
+66.0        1.33
+68.0        8.66
+70.0        2.0
+72.0        5.33
+73.0        7.33
+74.0        3.33
+75.0        2.66
+76.0        2.0
+78.0        1.33
+80.0        4.0
+81.0        2.0
+82.0        1.33
+83.0        3.33
+86.0        12.66
+87.0        8.66
+92.0        2.0
+93.0        10.0
+94.0        6.0
+98.0        14.66
+99.0        83.33
+100.0       60.66
+104.0       4.0
+107.0       1.33
+108.0       1.33
+110.0       3.33
+112.0       1.33
+113.0       1.33
+115.0       1.33
+116.0       1.33
+120.0       1.33
+122.0       4.0
+123.0       2.66
+124.0       2.66
+125.0       2.0
+126.0       1.33
+134.0       1.33
+135.0       2.0
+137.0       1.33
+147.0       1.33
+149.0       2.0
+150.0       4.66
+151.0       3.33
+159.0       2.0
+162.0       2.0
+163.0       2.66
+173.0       2.0
+174.0       8.66
+175.0       4.66
+177.0       2.0
+187.0       5.33
+188.0       4.66
+189.0       56.66
+190.0       12.0
+191.0       16.66
+198.0       10.66
+199.0       9.33
+200.0       72.66
+201.0       99.99
+202.0       16.0
+203.0       1.33
+207.0       1.33
+214.0       1.33
+217.0       25.33
+218.0       5.33
+247.0       52.66
+248.0       10.16
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/23-DICHLOROPHENOL.msp	Thu May 19 12:04:25 2022 +0000
@@ -0,0 +1,67 @@
+SYNONYM: 2,3-DICHLOROPHENOL
+DB#: JP000006
+INCHIKEY: UMPSXRYVXUPCOS-UHFFFAOYSA-N
+MW: 161.963920108
+FORMULA: C6H4Cl2O
+ACCESSION: JP000006
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Oc(c1)c(Cl)c(Cl)cc1
+INCHI: InChI=1S/C6H4Cl2O/c7-4-2-1-3-5(9)6(4)8/h1-3,9H
+SMILES_2: [H]OC=1C([H])=C([H])C([H])=C(Cl)C1Cl
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335870
+MOLECULAR_FORMULA: C6H4Cl2O
+TOTAL_EXACT_MASS: 161.963920108
+COMPOUND_NAME: 2,3-DICHLOROPHENOL
+PRECURSOR_MZ: 0
+PARENT_MASS: 161.96392
+NUM PEAKS: 42
+51.0        4.43
+53.0        10.39
+60.0        9.21
+61.0        24.93
+62.0        43.19
+63.0        99.99
+64.0        12.57
+65.0        4.81
+66.0        3.39
+71.0        3.67
+72.0        15.34
+73.0        25.07
+74.0        11.84
+75.0        8.79
+81.0        4.78
+82.0        3.25
+83.0        2.63
+84.0        3.87
+85.0        2.49
+87.0        5.09
+89.0        2.21
+91.0        6.02
+96.0        3.11
+97.0        12.05
+98.0        35.88
+99.0        22.09
+100.0       13.5
+101.0       6.26
+107.0       3.33
+109.0       2.73
+125.0       3.11
+126.0       59.16
+127.0       5.61
+128.0       19.32
+133.0       5.33
+135.0       2.84
+161.0       2.52
+162.0       68.96
+163.0       6.51
+164.0       51.64
+165.0       2.9
+166.0       7.58
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/24-DICHLOROPHENOL.msp	Thu May 19 12:04:25 2022 +0000
@@ -0,0 +1,62 @@
+SYNONYM: 2,4-DICHLOROPHENOL
+DB#: JP000007
+INCHIKEY: HFZWRUODUSTPEG-UHFFFAOYSA-N
+MW: 161.963920108
+FORMULA: C6H4Cl2O
+ACCESSION: JP000007
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Oc(c1)c(Cl)cc(Cl)c1
+INCHI: InChI=1S/C6H4Cl2O/c7-4-1-2-6(9)5(8)3-4/h1-3,9H
+SMILES_2: [H]OC1=C([H])C([H])=C(Cl)C([H])=C1Cl
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335864
+MOLECULAR_FORMULA: C6H4Cl2O
+TOTAL_EXACT_MASS: 161.963920108
+COMPOUND_NAME: 2,4-DICHLOROPHENOL
+PRECURSOR_MZ: 0
+PARENT_MASS: 161.96392
+NUM PEAKS: 37
+51.0        3.07
+53.0        12.34
+60.0        6.21
+61.0        19.31
+62.0        35.08
+63.0        99.99
+64.0        10.24
+66.0        2.25
+71.0        3.05
+72.0        10.59
+73.0        19.52
+74.0        8.59
+75.0        6.44
+81.0        6.82
+82.0        4.45
+83.0        2.77
+84.0        2.03
+91.0        2.34
+96.0        3.78
+97.0        31.79
+98.0        38.03
+99.0        21.59
+100.0       13.06
+101.0       4.67
+125.0       4.82
+126.0       20.32
+127.0       3.76
+128.0       7.38
+133.0       4.02
+134.0       2.72
+135.0       2.64
+161.0       19.22
+162.0       94.19
+163.0       15.34
+164.0       55.32
+165.0       5.54
+166.0       9.19
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/sample_output.msp	Thu May 19 12:04:25 2022 +0000
@@ -0,0 +1,229 @@
+SYNONYM: 1-NITROPYRENE
+DB#: JP000001
+INCHIKEY: ALRLPDGCPYIVHP-UHFFFAOYSA-N
+MW: 247.063328528
+FORMULA: C16H9NO2
+ACCESSION: JP000001
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: [O-1][N+1](=O)c(c4)c(c1)c(c3c4)c(c2cc3)c(ccc2)c1
+INCHI: InChI=1S/C16H9NO2/c18-17(19)14-9-7-12-5-4-10-2-1-3-11-6-8-13(14)16(12)15(10)11/h1-9H
+SMILES_2: [H]C=1C([H])=C2C([H])=C([H])C3=C([H])C([H])=C(C=4C([H])=C([H])C(C1[H])=C2C34)N(=O)=O
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335755
+MOLECULAR_FORMULA: C16H9NO2
+TOTAL_EXACT_MASS: 247.063328528
+COMPOUND_NAME: 1-NITROPYRENE
+PRECURSOR_MZ: 0.0
+PARENT_MASS: 247.06333
+NUM PEAKS: 75
+51.0        2.66
+55.0        8.0
+57.0        7.33
+58.0        1.33
+59.0        1.33
+60.0        14.0
+61.0        1.33
+62.0        3.33
+63.0        3.33
+66.0        1.33
+68.0        8.66
+70.0        2.0
+72.0        5.33
+73.0        7.33
+74.0        3.33
+75.0        2.66
+76.0        2.0
+78.0        1.33
+80.0        4.0
+81.0        2.0
+82.0        1.33
+83.0        3.33
+86.0        12.66
+87.0        8.66
+92.0        2.0
+93.0        10.0
+94.0        6.0
+98.0        14.66
+99.0        83.33
+100.0       60.66
+104.0       4.0
+107.0       1.33
+108.0       1.33
+110.0       3.33
+112.0       1.33
+113.0       1.33
+115.0       1.33
+116.0       1.33
+120.0       1.33
+122.0       4.0
+123.0       2.66
+124.0       2.66
+125.0       2.0
+126.0       1.33
+134.0       1.33
+135.0       2.0
+137.0       1.33
+147.0       1.33
+149.0       2.0
+150.0       4.66
+151.0       3.33
+159.0       2.0
+162.0       2.0
+163.0       2.66
+173.0       2.0
+174.0       8.66
+175.0       4.66
+177.0       2.0
+187.0       5.33
+188.0       4.66
+189.0       56.66
+190.0       12.0
+191.0       16.66
+198.0       10.66
+199.0       9.33
+200.0       72.66
+201.0       99.99
+202.0       16.0
+203.0       1.33
+207.0       1.33
+214.0       1.33
+217.0       25.33
+218.0       5.33
+247.0       52.66
+248.0       10.16
+
+SYNONYM: 2,3-DICHLOROPHENOL
+DB#: JP000006
+INCHIKEY: UMPSXRYVXUPCOS-UHFFFAOYSA-N
+MW: 161.963920108
+FORMULA: C6H4Cl2O
+ACCESSION: JP000006
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Oc(c1)c(Cl)c(Cl)cc1
+INCHI: InChI=1S/C6H4Cl2O/c7-4-2-1-3-5(9)6(4)8/h1-3,9H
+SMILES_2: [H]OC=1C([H])=C([H])C([H])=C(Cl)C1Cl
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335870
+MOLECULAR_FORMULA: C6H4Cl2O
+TOTAL_EXACT_MASS: 161.963920108
+COMPOUND_NAME: 2,3-DICHLOROPHENOL
+PRECURSOR_MZ: 0.0
+PARENT_MASS: 161.96392
+NUM PEAKS: 42
+51.0        4.43
+53.0        10.39
+60.0        9.21
+61.0        24.93
+62.0        43.19
+63.0        99.99
+64.0        12.57
+65.0        4.81
+66.0        3.39
+71.0        3.67
+72.0        15.34
+73.0        25.07
+74.0        11.84
+75.0        8.79
+81.0        4.78
+82.0        3.25
+83.0        2.63
+84.0        3.87
+85.0        2.49
+87.0        5.09
+89.0        2.21
+91.0        6.02
+96.0        3.11
+97.0        12.05
+98.0        35.88
+99.0        22.09
+100.0       13.5
+101.0       6.26
+107.0       3.33
+109.0       2.73
+125.0       3.11
+126.0       59.16
+127.0       5.61
+128.0       19.32
+133.0       5.33
+135.0       2.84
+161.0       2.52
+162.0       68.96
+163.0       6.51
+164.0       51.64
+165.0       2.9
+166.0       7.58
+
+SYNONYM: 2,4-DICHLOROPHENOL
+DB#: JP000007
+INCHIKEY: HFZWRUODUSTPEG-UHFFFAOYSA-N
+MW: 161.963920108
+FORMULA: C6H4Cl2O
+ACCESSION: JP000007
+AUTHOR: KOGA M, UNIV. OF OCCUPATIONAL AND ENVIRONMENTAL HEALTH
+LICENSE: CC BY-NC-SA
+INSTRUMENT: VARIAN MAT-44
+SMILES: Oc(c1)c(Cl)cc(Cl)c1
+INCHI: InChI=1S/C6H4Cl2O/c7-4-1-2-6(9)5(8)3-4/h1-3,9H
+SMILES_2: [H]OC1=C([H])C([H])=C(Cl)C([H])=C1Cl
+INSTRUMENT_TYPE: EI-B
+MS_LEVEL: MS1
+IONIZATION_ENERGY: 70 eV
+ION_TYPE: [M]+*
+IONIZATION_MODE: positive
+LAST_AUTO-CURATION: 1495210335864
+MOLECULAR_FORMULA: C6H4Cl2O
+TOTAL_EXACT_MASS: 161.963920108
+COMPOUND_NAME: 2,4-DICHLOROPHENOL
+PRECURSOR_MZ: 0.0
+PARENT_MASS: 161.96392
+NUM PEAKS: 37
+51.0        3.07
+53.0        12.34
+60.0        6.21
+61.0        19.31
+62.0        35.08
+63.0        99.99
+64.0        10.24
+66.0        2.25
+71.0        3.05
+72.0        10.59
+73.0        19.52
+74.0        8.59
+75.0        6.44
+81.0        6.82
+82.0        4.45
+83.0        2.77
+84.0        2.03
+91.0        2.34
+96.0        3.78
+97.0        31.79
+98.0        38.03
+99.0        21.59
+100.0       13.06
+101.0       4.67
+125.0       4.82
+126.0       20.32
+127.0       3.76
+128.0       7.38
+133.0       4.02
+134.0       2.72
+135.0       2.64
+161.0       19.22
+162.0       94.19
+163.0       15.34
+164.0       55.32
+165.0       5.54
+166.0       9.19
+