comparison formatter.py @ 3:574c6331e9db draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 38a5028a7abe99794086e9b1374ab4bb8bfa68de
author recetox
date Wed, 21 Sep 2022 15:29:51 +0000
parents 60f34912b3de
children 966b4134ad12
comparison
equal deleted inserted replaced
2:a5c84c9c1a28 3:574c6331e9db
1 import click 1 import click
2 from pandas import DataFrame, read_csv 2 from pandas import DataFrame, read_csv, to_numeric
3 3
4 4
5 def create_long_table(data: DataFrame, value_id: str) -> DataFrame: 5 def create_long_table(data: DataFrame, value_id: str) -> DataFrame:
6 """Convert the table from compact into long format. 6 """Convert the table from compact into long format.
7 See DataFrame.melt(...). 7 See DataFrame.melt(...).
71 matches_filename (str): Path to matches table. 71 matches_filename (str): Path to matches table.
72 72
73 Returns: 73 Returns:
74 DataFrame: Joined dataframe on compounds containing scores an matches in long format. 74 DataFrame: Joined dataframe on compounds containing scores an matches in long format.
75 """ 75 """
76 matches = read_csv(matches_filename, sep=None, index_col=0) 76 matches = read_csv(matches_filename, sep="\t", index_col=0, header=0).apply(to_numeric)
77 scores = read_csv(scores_filename, sep=None, index_col=0) 77 scores = read_csv(scores_filename, sep="\t", index_col=0, header=0).apply(to_numeric)
78 78
79 scores_long = create_long_table(scores, 'score') 79 scores_long = create_long_table(scores, 'score')
80 matches_long = create_long_table(matches, 'matches') 80 matches_long = create_long_table(matches, 'matches')
81 81
82 combined = join_df(matches_long, scores_long, on=['compound'], how='inner') 82 combined = join_df(matches_long, scores_long, on=['compound'], how='inner')
111 return result 111 return result
112 112
113 113
114 @cli.resultcallback() 114 @cli.resultcallback()
115 def write_output(result: DataFrame, scores_filename, matches_filename, output_filename): 115 def write_output(result: DataFrame, scores_filename, matches_filename, output_filename):
116 input_file = read_csv(scores_filename, sep=None, iterator=True)
117 sep = input_file._engine.data.dialect.delimiter
118
119 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'}) 116 result = result.reset_index().rename(columns={'level_0': 'query', 'compound': 'reference'})
120 result.to_csv(output_filename, sep=sep, index=False) 117 result.to_csv(output_filename, sep="\t", index=False)
121 118
122 119
123 if __name__ == '__main__': 120 if __name__ == '__main__':
124 cli(obj={}) 121 cli(obj={})