annotate matchms_split.py @ 5:c7be8e58a6af draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
author recetox
date Thu, 14 Aug 2025 12:39:28 +0000
parents 80df426e7e47
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
1 import argparse
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
2 import itertools
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
3 import os
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
4
5
c7be8e58a6af planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
recetox
parents: 0
diff changeset
5 from matchms import Metadata
0
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
6 from matchms.exporting import save_as_msp
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
7 from matchms.importing import load_from_msp
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
8
5
c7be8e58a6af planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
recetox
parents: 0
diff changeset
9 Metadata.set_key_replacements({})
0
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
10
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
11
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
12 def make_outdir(outdir: str):
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
13 """Create destination directory.
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
14
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
15 Args:
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
16 outdir (str): Path to destination directory where split spectra files are generated.
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
17 """
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
18 return os.mkdir(outdir)
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
19
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
20
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
21 def write_spectra(spectra, outdir):
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
22 """Generates MSP files of individual spectra.
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
23
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
24 Args:
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
25 spectra (List[Spectrum]): Spectra to write to file
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
26 outdir (str): Path to destination directory.
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
27 """
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
28 for i in range(len(spectra)):
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
29 save_as_msp(spectra[i], os.path.join(outdir, f"{i}.msp"))
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
30
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
31
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
32 def split_round_robin(iterable, num_chunks):
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
33 chunks = [list() for _ in range(num_chunks)]
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
34 index = itertools.cycle(range(num_chunks))
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
35 for value in iterable:
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
36 chunks[next(index)].append(value)
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
37 chunks = filter(lambda x: len(x) > 0, chunks)
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
38 return chunks
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
39
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
40
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
41 listarg = argparse.ArgumentParser()
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
42 listarg.add_argument('--filename', type=str)
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
43 listarg.add_argument('--method', type=str)
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
44 listarg.add_argument('--outdir', type=str)
5
c7be8e58a6af planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit a57d984a9850c1faa44b6948981eb1303881ba9c
recetox
parents: 0
diff changeset
45 listarg.add_argument('--parameter', type=int, required=False)
0
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
46 args = listarg.parse_args()
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
47 outdir = args.outdir
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
48 filename = args.filename
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
49 method = args.method
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
50 parameter = args.parameter
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
51
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
52
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
53 if __name__ == "__main__":
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
54 spectra = load_from_msp(filename, metadata_harmonization=False)
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
55 make_outdir(outdir)
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
56
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
57 if method == "one-per-file":
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
58 write_spectra(list(spectra), outdir)
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
59 else:
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
60 if method == "chunk-size":
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
61 chunks = iter(lambda: list(itertools.islice(spectra, parameter)), [])
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
62 elif method == "num-chunks":
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
63 chunks = split_round_robin(spectra, parameter)
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
64 for i, x in enumerate(chunks):
80df426e7e47 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 113433b8b9790405c2c5d054aee4a29a21b77dc7
recetox
parents:
diff changeset
65 save_as_msp(x, os.path.join(outdir, f"chunk_{i}.msp"))