Mercurial > repos > recetox > matchms_remove_spectra
annotate matchms_split.py @ 5:c7be8e58a6af draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
author | recetox |
---|---|
date | Thu, 14 Aug 2025 12:39:28 +0000 |
parents | 80df426e7e47 |
children |
rev | line source |
---|---|
0
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
1 import argparse |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
2 import itertools |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
3 import os |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
4 |
5
c7be8e58a6af
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
recetox
parents:
0
diff
changeset
|
5 from matchms import Metadata |
0
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
6 from matchms.exporting import save_as_msp |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
7 from matchms.importing import load_from_msp |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
8 |
5
c7be8e58a6af
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
recetox
parents:
0
diff
changeset
|
9 Metadata.set_key_replacements({}) |
0
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
10 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
11 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
12 def make_outdir(outdir: str): |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
13 """Create destination directory. |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
14 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
15 Args: |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
16 outdir (str): Path to destination directory where split spectra files are generated. |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
17 """ |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
18 return os.mkdir(outdir) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
19 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
20 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
21 def write_spectra(spectra, outdir): |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
22 """Generates MSP files of individual spectra. |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
23 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
24 Args: |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
25 spectra (List[Spectrum]): Spectra to write to file |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
26 outdir (str): Path to destination directory. |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
27 """ |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
28 for i in range(len(spectra)): |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
29 save_as_msp(spectra[i], os.path.join(outdir, f"{i}.msp")) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
30 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
31 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
32 def split_round_robin(iterable, num_chunks): |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
33 chunks = [list() for _ in range(num_chunks)] |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
34 index = itertools.cycle(range(num_chunks)) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
35 for value in iterable: |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
36 chunks[next(index)].append(value) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
37 chunks = filter(lambda x: len(x) > 0, chunks) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
38 return chunks |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
39 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
40 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
41 listarg = argparse.ArgumentParser() |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
42 listarg.add_argument('--filename', type=str) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
43 listarg.add_argument('--method', type=str) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
44 listarg.add_argument('--outdir', type=str) |
5
c7be8e58a6af
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
recetox
parents:
0
diff
changeset
|
45 listarg.add_argument('--parameter', type=int, required=False) |
0
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
46 args = listarg.parse_args() |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
47 outdir = args.outdir |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
48 filename = args.filename |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
49 method = args.method |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
50 parameter = args.parameter |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
51 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
52 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
53 if __name__ == "__main__": |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
54 spectra = load_from_msp(filename, metadata_harmonization=False) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
55 make_outdir(outdir) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
56 |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
57 if method == "one-per-file": |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
58 write_spectra(list(spectra), outdir) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
59 else: |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
60 if method == "chunk-size": |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
61 chunks = iter(lambda: list(itertools.islice(spectra, parameter)), []) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
62 elif method == "num-chunks": |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
63 chunks = split_round_robin(spectra, parameter) |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
64 for i, x in enumerate(chunks): |
80df426e7e47
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff
changeset
|
65 save_as_msp(x, os.path.join(outdir, f"chunk_{i}.msp")) |