comparison matchms_similarity_wrapper.py @ 4:ba5e9bd05d5b draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5661cf2406e0616d7b2f4bee1b57ec43716088de
author recetox
date Tue, 18 Oct 2022 11:00:55 +0000
parents 9f8532c99845
children
comparison
equal deleted inserted replaced
3:9f8532c99845 4:ba5e9bd05d5b
3 3
4 import numpy as np 4 import numpy as np
5 from matchms import calculate_scores 5 from matchms import calculate_scores
6 from matchms.importing import load_from_mgf, load_from_msp 6 from matchms.importing import load_from_mgf, load_from_msp
7 from matchms.similarity import CosineGreedy, CosineHungarian, MetadataMatch, ModifiedCosine, NeutralLossesCosine 7 from matchms.similarity import CosineGreedy, CosineHungarian, MetadataMatch, ModifiedCosine, NeutralLossesCosine
8 from pandas import DataFrame
9 8
10 9
11 def convert_precursor_mz(spectrum): 10 def convert_precursor_mz(spectrum):
12 """ 11 """
13 Check the presence of precursor m/z since it is needed for ModifiedCosine similarity metric. Convert to float if 12 Check the presence of precursor m/z since it is needed for ModifiedCosine similarity metric. Convert to float if
33 parser.add_argument("queries_format", type=str, help="Query spectra file format.") 32 parser.add_argument("queries_format", type=str, help="Query spectra file format.")
34 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.') 33 parser.add_argument("similarity_metric", type=str, help='Metric to use for matching.')
35 parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.") 34 parser.add_argument("tolerance", type=float, help="Tolerance to use for peak matching.")
36 parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.") 35 parser.add_argument("mz_power", type=float, help="The power to raise mz to in the cosine function.")
37 parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.") 36 parser.add_argument("intensity_power", type=float, help="The power to raise intensity to in the cosine function.")
38 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .tsv scores.") 37 parser.add_argument("output_filename_scores", type=str, help="Path where to store the output .json scores.")
39 parser.add_argument("output_filename_matches", type=str, help="Path where to store the output .tsv matches.")
40 args = parser.parse_args() 38 args = parser.parse_args()
41 39
42 if args.queries_format == 'msp': 40 if args.queries_format == 'msp':
43 queries_spectra = list(load_from_msp(args.queries_filename)) 41 queries_spectra = list(load_from_msp(args.queries_filename))
44 elif args.queries_format == 'mgf': 42 elif args.queries_format == 'mgf':
87 write_outputs(args, scores) 85 write_outputs(args, scores)
88 return 0 86 return 0
89 87
90 88
91 def write_outputs(args, scores): 89 def write_outputs(args, scores):
90 """Write Scores to json file."""
92 print("Storing outputs...") 91 print("Storing outputs...")
93 query_names = [spectra.metadata['compound_name'] for spectra in scores.queries] 92 scores.to_json(args.output_filename_scores)
94 reference_names = [spectra.metadata['compound_name'] for spectra in scores.references]
95
96 # Write scores to dataframe
97 dataframe_scores = DataFrame(data=[entry["score"] for entry in scores.scores], index=reference_names, columns=query_names)
98 dataframe_scores.to_csv(args.output_filename_scores, sep='\t')
99
100 # Write number of matches to dataframe
101 dataframe_matches = DataFrame(data=[entry["matches"] for entry in scores.scores], index=reference_names, columns=query_names)
102 dataframe_matches.to_csv(args.output_filename_matches, sep='\t')
103 93
104 94
105 if __name__ == "__main__": 95 if __name__ == "__main__":
106 main(argv=sys.argv[1:]) 96 main(argv=sys.argv[1:])
107 pass 97 pass