Mercurial > repos > recetox > matchms_formatter
comparison formatter.py @ 3:574c6331e9db draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 38a5028a7abe99794086e9b1374ab4bb8bfa68de
author | recetox |
---|---|
date | Wed, 21 Sep 2022 15:29:51 +0000 |
parents | 60f34912b3de |
children | 966b4134ad12 |
comparison
equal
deleted
inserted
replaced
2:a5c84c9c1a28 | 3:574c6331e9db |
---|---|
1 import click | 1 import click |
2 from pandas import DataFrame, read_csv | 2 from pandas import DataFrame, read_csv, to_numeric |
3 | 3 |
4 | 4 |
5 def create_long_table(data: DataFrame, value_id: str) -> DataFrame: | 5 def create_long_table(data: DataFrame, value_id: str) -> DataFrame: |
6 """Convert the table from compact into long format. | 6 """Convert the table from compact into long format. |
7 See DataFrame.melt(...). | 7 See DataFrame.melt(...). |
71 matches_filename (str): Path to matches table. | 71 matches_filename (str): Path to matches table. |
72 | 72 |
73 Returns: | 73 Returns: |
74 DataFrame: Joined dataframe on compounds containing scores an matches in long format. | 74 DataFrame: Joined dataframe on compounds containing scores an matches in long format. |
75 """ | 75 """ |
76 matches = read_csv(matches_filename, sep=None, index_col=0) | 76 matches = read_csv(matches_filename, sep="\t", index_col=0, header=0).apply(to_numeric) |
77 scores = read_csv(scores_filename, sep=None, index_col=0) | 77 scores = read_csv(scores_filename, sep="\t", index_col=0, header=0).apply(to_numeric) |
78 | 78 |
79 scores_long = create_long_table(scores, 'score') | 79 scores_long = create_long_table(scores, 'score') |
80 matches_long = create_long_table(matches, 'matches') | 80 matches_long = create_long_table(matches, 'matches') |
81 | 81 |
82 combined = join_df(matches_long, scores_long, on=['compound'], how='inner') | 82 combined = join_df(matches_long, scores_long, on=['compound'], how='inner') |
111 return result | 111 return result |
112 | 112 |
113 | 113 |
114 @cli.resultcallback() | 114 @cli.resultcallback() |
115 def write_output(result: DataFrame, scores_filename, matches_filename, output_filename): | 115 def write_output(result: DataFrame, scores_filename, matches_filename, output_filename): |
116 input_file = read_csv(scores_filename, sep=None, iterator=True) | |
117 sep = input_file._engine.data.dialect.delimiter | |
118 | |
119 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'}) | 116 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'}) |
120 result.to_csv(output_filename, sep=sep, index=False) | 117 result.to_csv(output_filename, sep="\t", index=False) |
121 | 118 |
122 | 119 |
123 if __name__ == '__main__': | 120 if __name__ == '__main__': |
124 cli(obj={}) | 121 cli(obj={}) |