diff matchms_metadata_merge.xml @ 0:107186a6fcec draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
author recetox
date Thu, 23 Nov 2023 09:56:32 +0000
parents
children 8e18dcf410ec
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/matchms_metadata_merge.xml	Thu Nov 23 09:56:32 2023 +0000
@@ -0,0 +1,92 @@
+<tool id="matchms_metadata_merge" name="matchms metadata merge" version="@TOOL_VERSION@+galaxy0" profile="21.09">
+    <description>Merge metadata csv into MSP by a specified column</description>
+    
+    <macros>
+        <import>macros.xml</import>
+        <import>help.xml</import>
+    </macros>
+
+    <expand macro="creator"/>
+
+    <edam_operations>
+        <edam_operation>operation_2409</edam_operation>
+    </edam_operations>
+    <expand macro="bio.tools"/>
+
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">matchms</requirement>
+    </requirements>
+
+    <command detect_errors='aggressive'><![CDATA[
+        python '${matchms_python_cli}'
+    ]]></command>
+
+<configfiles>
+<configfile name="matchms_python_cli">
+import pandas
+import matchms
+import numpy as np
+
+matchms.set_matchms_logger_level('ERROR')
+matchms.Metadata.set_key_replacements({})
+
+spectra = list(matchms.importing.load_from_msp('${spectral_library}', False))
+
+metadata_table = pandas.read_csv('${metadata_table_file}', dtype=object)
+metadata_table.columns = map(str.lower, metadata_table.columns)
+
+metadata_table.drop_duplicates(subset='${user_specified_column}'.lower(), inplace=True)
+
+spectra_metadata= pandas.DataFrame.from_dict([x.metadata for x in spectra])
+spectra_metadata.dropna(axis=1, inplace=True)
+
+merged = metadata_table.merge(spectra_metadata, on='${user_specified_column}'.lower(), how='right')
+
+spectra_arr = np.asarray(spectra, dtype=object)
+
+def update_metadata(spectrum: matchms.Spectrum, row):
+    metadata = spectrum.metadata
+    metadata.update(row)
+    spectrum.metadata = metadata
+    return spectrum
+
+vec_update_metadata = np.vectorize(update_metadata)
+merged_array = vec_update_metadata(spectra_arr, merged.to_dict(orient='records'))
+
+matchms.exporting.save_as_msp(merged_array.tolist(), '${output}')
+</configfile>
+</configfiles>
+
+    <inputs>
+        <param label="Spectra file" name="spectral_library" type="data" format="msp"
+            help="Mass spectral library file." />
+        <param label="Metadata csv file" name="metadata_table_file" type="data" format="csv"
+            help="csv file containing the metadata." />
+
+        <param label="specify column/metadata key" name="user_specified_column" type="text" value="compound_name" help="Name of the user specified column to merge the data on." />
+    </inputs>
+
+    <outputs>
+        <data label="${tool.name} on ${on_string}" name="output" format="msp">
+        </data>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="spectral_library" value="metadata_merge/input.msp" ftype="msp"/>
+            <param name="metadata_table_file" value="metadata_merge/metadata.csv" ftype="csv"/>
+            <param name="user_specified_column" value="name"/>
+            <output name="output" file="metadata_merge/output.msp" ftype="msp"/>
+        </test>
+    </tests>
+
+    <help>
+        **Description**
+            The tool takes an msp file and a metadata csv file and merges the metadata in the csv 
+            file with the metadata in the MSP file on a user specified column.
+    </help>
+
+    <citations>
+        <citation type="doi">https://doi.org/10.5281/zenodo.8083373</citation>
+    </citations>
+</tool>