Mercurial > repos > recetox > matchms_formatter
comparison formatter.py @ 3:574c6331e9db draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 38a5028a7abe99794086e9b1374ab4bb8bfa68de
| author | recetox | 
|---|---|
| date | Wed, 21 Sep 2022 15:29:51 +0000 | 
| parents | 60f34912b3de | 
| children | 966b4134ad12 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 2:a5c84c9c1a28 | 3:574c6331e9db | 
|---|---|
| 1 import click | 1 import click | 
| 2 from pandas import DataFrame, read_csv | 2 from pandas import DataFrame, read_csv, to_numeric | 
| 3 | 3 | 
| 4 | 4 | 
| 5 def create_long_table(data: DataFrame, value_id: str) -> DataFrame: | 5 def create_long_table(data: DataFrame, value_id: str) -> DataFrame: | 
| 6 """Convert the table from compact into long format. | 6 """Convert the table from compact into long format. | 
| 7 See DataFrame.melt(...). | 7 See DataFrame.melt(...). | 
| 71 matches_filename (str): Path to matches table. | 71 matches_filename (str): Path to matches table. | 
| 72 | 72 | 
| 73 Returns: | 73 Returns: | 
| 74 DataFrame: Joined dataframe on compounds containing scores an matches in long format. | 74 DataFrame: Joined dataframe on compounds containing scores an matches in long format. | 
| 75 """ | 75 """ | 
| 76 matches = read_csv(matches_filename, sep=None, index_col=0) | 76 matches = read_csv(matches_filename, sep="\t", index_col=0, header=0).apply(to_numeric) | 
| 77 scores = read_csv(scores_filename, sep=None, index_col=0) | 77 scores = read_csv(scores_filename, sep="\t", index_col=0, header=0).apply(to_numeric) | 
| 78 | 78 | 
| 79 scores_long = create_long_table(scores, 'score') | 79 scores_long = create_long_table(scores, 'score') | 
| 80 matches_long = create_long_table(matches, 'matches') | 80 matches_long = create_long_table(matches, 'matches') | 
| 81 | 81 | 
| 82 combined = join_df(matches_long, scores_long, on=['compound'], how='inner') | 82 combined = join_df(matches_long, scores_long, on=['compound'], how='inner') | 
| 111 return result | 111 return result | 
| 112 | 112 | 
| 113 | 113 | 
| 114 @cli.resultcallback() | 114 @cli.resultcallback() | 
| 115 def write_output(result: DataFrame, scores_filename, matches_filename, output_filename): | 115 def write_output(result: DataFrame, scores_filename, matches_filename, output_filename): | 
| 116 input_file = read_csv(scores_filename, sep=None, iterator=True) | |
| 117 sep = input_file._engine.data.dialect.delimiter | |
| 118 | |
| 119 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'}) | 116 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'}) | 
| 120 result.to_csv(output_filename, sep=sep, index=False) | 117 result.to_csv(output_filename, sep="\t", index=False) | 
| 121 | 118 | 
| 122 | 119 | 
| 123 if __name__ == '__main__': | 120 if __name__ == '__main__': | 
| 124 cli(obj={}) | 121 cli(obj={}) | 
