Mercurial > repos > recetox > matchms_formatter
annotate formatter.py @ 11:ae45992f969e draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
author | recetox |
---|---|
date | Thu, 12 Oct 2023 13:29:16 +0000 |
parents | 1b09315a3f87 |
children |
rev | line source |
---|---|
0
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
1 import click |
4
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
2 from matchms.importing import scores_from_json |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
3 from pandas import DataFrame |
0
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
4 |
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
5 |
10
1b09315a3f87
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
4
diff
changeset
|
6 def scores_to_dataframe(scores): |
4
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
7 """Unpack scores from matchms.scores into two dataframes of scores and matches. |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
8 |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
9 Args: |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
10 scores (matchms.scores): matchms.scores object. |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
11 |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
12 Returns: |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
13 DataFrame: Scores |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
14 DataFrame: Matches |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
15 """ |
11
ae45992f969e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
recetox
parents:
10
diff
changeset
|
16 data = [] |
4
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
17 |
10
1b09315a3f87
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
4
diff
changeset
|
18 for i, (row, col) in enumerate(zip(scores.scores.row, scores.scores.col)): |
11
ae45992f969e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
recetox
parents:
10
diff
changeset
|
19 data.append([scores.queries[col].metadata['compound_name'], scores.references[row].metadata['compound_name'], *scores.scores.data[i]]) |
ae45992f969e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
recetox
parents:
10
diff
changeset
|
20 |
ae45992f969e
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
recetox
parents:
10
diff
changeset
|
21 dataframe = DataFrame(data, columns=['query', 'reference', *scores.scores.score_names]) |
4
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
22 |
10
1b09315a3f87
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
4
diff
changeset
|
23 return dataframe |
4
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
24 |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
25 |
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
26 def load_data(scores_filename: str) -> DataFrame: |
0
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
27 """Load data from filenames and join on compound id. |
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
28 |
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
29 Args: |
4
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
30 scores_filename (str): Path to json file with serialized scores. |
0
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
31 |
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
32 Returns: |
4
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
33 DataFrame: Joined dataframe on compounds containing scores and matches in long format. |
0
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
34 """ |
4
966b4134ad12
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
recetox
parents:
3
diff
changeset
|
35 scores = scores_from_json(scores_filename) |
10
1b09315a3f87
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
4
diff
changeset
|
36 scores = scores_to_dataframe(scores) |
0
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
37 |
10
1b09315a3f87
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
4
diff
changeset
|
38 return scores |
0
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
39 |
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
40 |
10
1b09315a3f87
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
4
diff
changeset
|
41 @click.group(invoke_without_command=True) |
0
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
42 @click.option('--sf', 'scores_filename', type=click.Path(exists=True), required=True) |
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
43 @click.option('--o', 'output_filename', type=click.Path(writable=True), required=True) |
10
1b09315a3f87
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
4
diff
changeset
|
44 def cli(scores_filename, output_filename): |
1b09315a3f87
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
4
diff
changeset
|
45 result = load_data(scores_filename) |
1b09315a3f87
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
4
diff
changeset
|
46 result.to_csv(output_filename, sep="\t", index=False) |
0
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
47 pass |
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
48 |
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
49 |
60f34912b3de
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 4d2ac914c951166e386a94d8ebb8cb1becfac122"
recetox
parents:
diff
changeset
|
50 if __name__ == '__main__': |
10
1b09315a3f87
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
4
diff
changeset
|
51 cli() |