Mercurial > repos > recetox > matchms_metadata_merge
diff matchms_metadata_merge.xml @ 0:107186a6fcec draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f5f5a8eff32c9b5de792dce99bc3c63dc971e82c
author | recetox |
---|---|
date | Thu, 23 Nov 2023 09:56:32 +0000 |
parents | |
children | 8e18dcf410ec |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/matchms_metadata_merge.xml Thu Nov 23 09:56:32 2023 +0000 @@ -0,0 +1,92 @@ +<tool id="matchms_metadata_merge" name="matchms metadata merge" version="@TOOL_VERSION@+galaxy0" profile="21.09"> + <description>Merge metadata csv into MSP by a specified column</description> + + <macros> + <import>macros.xml</import> + <import>help.xml</import> + </macros> + + <expand macro="creator"/> + + <edam_operations> + <edam_operation>operation_2409</edam_operation> + </edam_operations> + <expand macro="bio.tools"/> + + <requirements> + <requirement type="package" version="@TOOL_VERSION@">matchms</requirement> + </requirements> + + <command detect_errors='aggressive'><![CDATA[ + python '${matchms_python_cli}' + ]]></command> + +<configfiles> +<configfile name="matchms_python_cli"> +import pandas +import matchms +import numpy as np + +matchms.set_matchms_logger_level('ERROR') +matchms.Metadata.set_key_replacements({}) + +spectra = list(matchms.importing.load_from_msp('${spectral_library}', False)) + +metadata_table = pandas.read_csv('${metadata_table_file}', dtype=object) +metadata_table.columns = map(str.lower, metadata_table.columns) + +metadata_table.drop_duplicates(subset='${user_specified_column}'.lower(), inplace=True) + +spectra_metadata= pandas.DataFrame.from_dict([x.metadata for x in spectra]) +spectra_metadata.dropna(axis=1, inplace=True) + +merged = metadata_table.merge(spectra_metadata, on='${user_specified_column}'.lower(), how='right') + +spectra_arr = np.asarray(spectra, dtype=object) + +def update_metadata(spectrum: matchms.Spectrum, row): + metadata = spectrum.metadata + metadata.update(row) + spectrum.metadata = metadata + return spectrum + +vec_update_metadata = np.vectorize(update_metadata) +merged_array = vec_update_metadata(spectra_arr, merged.to_dict(orient='records')) + +matchms.exporting.save_as_msp(merged_array.tolist(), '${output}') +</configfile> +</configfiles> + + <inputs> + <param label="Spectra file" name="spectral_library" type="data" format="msp" + help="Mass spectral library file." /> + <param label="Metadata csv file" name="metadata_table_file" type="data" format="csv" + help="csv file containing the metadata." /> + + <param label="specify column/metadata key" name="user_specified_column" type="text" value="compound_name" help="Name of the user specified column to merge the data on." /> + </inputs> + + <outputs> + <data label="${tool.name} on ${on_string}" name="output" format="msp"> + </data> + </outputs> + + <tests> + <test> + <param name="spectral_library" value="metadata_merge/input.msp" ftype="msp"/> + <param name="metadata_table_file" value="metadata_merge/metadata.csv" ftype="csv"/> + <param name="user_specified_column" value="name"/> + <output name="output" file="metadata_merge/output.msp" ftype="msp"/> + </test> + </tests> + + <help> + **Description** + The tool takes an msp file and a metadata csv file and merges the metadata in the csv + file with the metadata in the MSP file on a user specified column. + </help> + + <citations> + <citation type="doi">https://doi.org/10.5281/zenodo.8083373</citation> + </citations> +</tool>