Mercurial > repos > recetox > matchms_formatter
comparison matchms_filtering_wrapper.py @ 11:ae45992f969e draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit b1cc1aebf796f170d93e3dd46ffcdefdc7b8018a
author | recetox |
---|---|
date | Thu, 12 Oct 2023 13:29:16 +0000 |
parents | 1b09315a3f87 |
children | 23d4bc72c505 |
comparison
equal
deleted
inserted
replaced
10:1b09315a3f87 | 11:ae45992f969e |
---|---|
2 import sys | 2 import sys |
3 | 3 |
4 from matchms.exporting import save_as_mgf, save_as_msp | 4 from matchms.exporting import save_as_mgf, save_as_msp |
5 from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\ | 5 from matchms.filtering import add_compound_name, add_fingerprint, add_losses, add_parent_mass, add_precursor_mz,\ |
6 add_retention_index, add_retention_time, clean_compound_name | 6 add_retention_index, add_retention_time, clean_compound_name |
7 from matchms.filtering import default_filters, normalize_intensities, select_by_mz, select_by_relative_intensity | 7 from matchms.filtering import default_filters, normalize_intensities, reduce_to_number_of_peaks, select_by_mz, \ |
8 select_by_relative_intensity | |
8 from matchms.importing import load_from_mgf, load_from_msp | 9 from matchms.importing import load_from_mgf, load_from_msp |
10 | |
11 | |
12 def require_key(spectrum, key): | |
13 if spectrum.get(key): | |
14 return spectrum | |
15 | |
16 return None | |
9 | 17 |
10 | 18 |
11 def main(argv): | 19 def main(argv): |
12 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") | 20 parser = argparse.ArgumentParser(description="Compute MSP similarity scores") |
13 parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.") | 21 parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.") |
25 parser.add_argument("--to_intensity", type=float, help="Upper bound for intensity filter") | 33 parser.add_argument("--to_intensity", type=float, help="Upper bound for intensity filter") |
26 parser.add_argument("-mz_range", action='store_true', | 34 parser.add_argument("-mz_range", action='store_true', |
27 help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).") | 35 help="Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz).") |
28 parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter") | 36 parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter") |
29 parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter") | 37 parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter") |
38 parser.add_argument("-require_smiles", action='store_true', | |
39 help="Remove spectra that does not contain SMILES.") | |
40 parser.add_argument("-require_inchi", action='store_true', | |
41 help="Remove spectra that does not contain INCHI.") | |
42 parser.add_argument("-reduce_to_top_n_peaks", action='store_true', | |
43 help="reduce to top n peaks filter.") | |
44 parser.add_argument("--n_max", type=int, help="Maximum number of peaks. Remove peaks if more peaks are found.") | |
30 args = parser.parse_args() | 45 args = parser.parse_args() |
31 | 46 |
32 if not (args.normalise_intensities | 47 if not (args.normalise_intensities |
33 or args.default_filters | 48 or args.default_filters |
34 or args.clean_metadata | 49 or args.clean_metadata |
35 or args.relative_intensity | 50 or args.relative_intensity |
36 or args.mz_range): | 51 or args.mz_range |
52 or args.require_smiles | |
53 or args.require_inchi | |
54 or args.reduce_to_top_n_peaks): | |
37 raise ValueError('No filter selected.') | 55 raise ValueError('No filter selected.') |
38 | 56 |
39 if args.spectra_format == 'msp': | 57 if args.spectra_format == 'msp': |
40 spectra = list(load_from_msp(args.spectra)) | 58 spectra = list(load_from_msp(args.spectra)) |
41 elif args.queries_format == 'mgf': | 59 elif args.queries_format == 'mgf': |
61 spectrum = select_by_relative_intensity(spectrum, args.from_intensity, args.to_intensity) | 79 spectrum = select_by_relative_intensity(spectrum, args.from_intensity, args.to_intensity) |
62 | 80 |
63 if args.mz_range: | 81 if args.mz_range: |
64 spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz) | 82 spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz) |
65 | 83 |
66 filtered_spectra.append(spectrum) | 84 if args.reduce_to_top_n_peaks: |
85 spectrum = reduce_to_number_of_peaks(spectrum_in=spectrum, n_max=args.n_max) | |
86 | |
87 if args.require_smiles and spectrum is not None: | |
88 spectrum = require_key(spectrum, "smiles") | |
89 | |
90 if args.require_inchi and spectrum is not None: | |
91 spectrum = require_key(spectrum, "inchi") | |
92 | |
93 if spectrum is not None: | |
94 filtered_spectra.append(spectrum) | |
67 | 95 |
68 if args.spectra_format == 'msp': | 96 if args.spectra_format == 'msp': |
69 save_as_msp(filtered_spectra, args.output) | 97 save_as_msp(filtered_spectra, args.output) |
70 else: | 98 else: |
71 save_as_mgf(filtered_spectra, args.output) | 99 save_as_mgf(filtered_spectra, args.output) |