Mercurial > repos > recetox > matchms_formatter
changeset 27:402620029a7a draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit c626c8db7ba4dd30f85f7086e16e1e2413e36bd8
author | recetox |
---|---|
date | Mon, 22 Apr 2024 08:40:39 +0000 |
parents | 48ade5cc847f |
children | 8bd942dcf1ad |
files | formatter.py matchms_formatter.xml |
diffstat | 2 files changed, 19 insertions(+), 58 deletions(-) [+] |
line wrap: on
line diff
--- a/formatter.py Tue Apr 16 11:25:14 2024 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,51 +0,0 @@ -import click -from matchms.importing import scores_from_json -from pandas import DataFrame - - -def scores_to_dataframe(scores): - """Unpack scores from matchms.scores into two dataframes of scores and matches. - - Args: - scores (matchms.scores): matchms.scores object. - - Returns: - DataFrame: Scores - DataFrame: Matches - """ - data = [] - - for i, (row, col) in enumerate(zip(scores.scores.row, scores.scores.col)): - data.append([scores.queries[col].metadata['compound_name'], scores.references[row].metadata['compound_name'], *scores.scores.data[i]]) - - dataframe = DataFrame(data, columns=['query', 'reference', *scores.scores.score_names]) - - return dataframe - - -def load_data(scores_filename: str) -> DataFrame: - """Load data from filenames and join on compound id. - - Args: - scores_filename (str): Path to json file with serialized scores. - - Returns: - DataFrame: Joined dataframe on compounds containing scores and matches in long format. - """ - scores = scores_from_json(scores_filename) - scores = scores_to_dataframe(scores) - - return scores - - -@click.group(invoke_without_command=True) -@click.option('--sf', 'scores_filename', type=click.Path(exists=True), required=True) -@click.option('--o', 'output_filename', type=click.Path(writable=True), required=True) -def cli(scores_filename, output_filename): - result = load_data(scores_filename) - result.to_csv(output_filename, sep="\t", index=False) - pass - - -if __name__ == '__main__': - cli()
--- a/matchms_formatter.xml Tue Apr 16 11:25:14 2024 +0000 +++ b/matchms_formatter.xml Mon Apr 22 08:40:39 2024 +0000 @@ -1,4 +1,4 @@ -<tool id="matchms_formatter" name="matchms scores formatter" version="@TOOL_VERSION@+galaxy3" profile="21.09"> +<tool id="matchms_formatter" name="matchms scores formatter" version="@TOOL_VERSION@+galaxy4" profile="21.09"> <description>reformat scores object of matchms to long format table</description> <macros> @@ -13,12 +13,11 @@ <requirements> <requirement type="package" version="@TOOL_VERSION@">matchms</requirement> - <requirement type="package" version="8.0.1">click</requirement> <requirement type="package" version="1.1.4">pandas</requirement> </requirements> <command detect_errors="aggressive"><![CDATA[ - sh ${matchms_formatter_cli} + python3 '${formatter}' ]]></command> <environment_variables> @@ -29,15 +28,26 @@ </environment_variables> <configfiles> - <configfile name="matchms_formatter_cli"> - python3 ${__tool_directory__}/formatter.py \ - --sf '$scores' \ - --o '$output' + <configfile name="formatter"> +from matchms.importing import scores_from_json +from pandas import DataFrame + +scores = scores_from_json('$scores') +data = [] + +for i, (row, col) in enumerate(zip(scores.scores.row, scores.scores.col)): + data.append([scores.queries[col].metadata['$key'], scores.references[row].metadata['$key'], *scores.scores.data[i]]) + +result = DataFrame(data, columns=['query', 'reference', *scores.scores.score_names]) + +result.to_csv('$output', sep="\t", index=False) + </configfile> </configfiles> <inputs> <param label="Scores object" name="scores" type="data" format="json" help="matchms Scores json file." /> + <param label="Key" name="key" type="text" value="compound_name" help="Name of the key to use. Default is 'compound_name'." /> </inputs> <outputs> <data label="${tool.name} on ${on_string}" name="output" format="tsv"/> @@ -46,11 +56,13 @@ <tests> <test> <param name="scores" value="formatter/fill2_trunc_scores_with_metadata_match.json" ftype="json"/> + <param name="key" value="compound_name"/> <output name="output" value="formatter/fill2_formatted.tsv" ftype="tsv" checksum="md5$4f0d83da381b8a403d807d26a9dd0f34"/> </test> <test> <param name="scores" value="similarity/scores_test4_out.json" ftype="json"/> + <param name="key" value="compound_name"/> <output name="output" file="formatter/test4_formatted.tsv" ftype="tsv"/> </test> </tests>