annotate matchms_split.py @ 23:34439ffa6a60 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
author recetox
date Mon, 05 Feb 2024 10:39:25 +0000
parents 1b09315a3f87
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
1 import argparse
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
2 import itertools
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
3 import os
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
4
23
34439ffa6a60 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
recetox
parents: 10
diff changeset
5 import matchms
10
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
6 from matchms.exporting import save_as_msp
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
7 from matchms.importing import load_from_msp
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
8
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
9
23
34439ffa6a60 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
recetox
parents: 10
diff changeset
10 matchms.Metadata.set_key_replacements({})
10
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
11
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
12
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
13 def make_outdir(outdir: str):
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
14 """Create destination directory.
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
15
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
16 Args:
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
17 outdir (str): Path to destination directory where split spectra files are generated.
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
18 """
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
19 return os.mkdir(outdir)
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
20
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
21
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
22 def write_spectra(spectra, outdir):
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
23 """Generates MSP files of individual spectra.
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
24
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
25 Args:
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
26 spectra (List[Spectrum]): Spectra to write to file
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
27 outdir (str): Path to destination directory.
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
28 """
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
29 for i in range(len(spectra)):
23
34439ffa6a60 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
recetox
parents: 10
diff changeset
30 save_as_msp(spectra[i], os.path.join(outdir, f"{i}.msp"))
10
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
31
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
32
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
33 def split_round_robin(iterable, num_chunks):
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
34 chunks = [list() for _ in range(num_chunks)]
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
35 index = itertools.cycle(range(num_chunks))
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
36 for value in iterable:
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
37 chunks[next(index)].append(value)
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
38 chunks = filter(lambda x: len(x) > 0, chunks)
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
39 return chunks
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
40
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
41
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
42 listarg = argparse.ArgumentParser()
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
43 listarg.add_argument('--filename', type=str)
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
44 listarg.add_argument('--method', type=str)
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
45 listarg.add_argument('--outdir', type=str)
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
46 listarg.add_argument('--parameter', type=int)
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
47 args = listarg.parse_args()
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
48 outdir = args.outdir
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
49 filename = args.filename
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
50 method = args.method
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
51 parameter = args.parameter
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
52
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
53
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
54 if __name__ == "__main__":
23
34439ffa6a60 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
recetox
parents: 10
diff changeset
55 spectra = load_from_msp(filename, metadata_harmonization=False)
10
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
56 make_outdir(outdir)
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
57
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
58 if method == "one-per-file":
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
59 write_spectra(list(spectra), outdir)
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
60 else:
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
61 if method == "chunk-size":
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
62 chunks = iter(lambda: list(itertools.islice(spectra, parameter)), [])
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
63 elif method == "num-chunks":
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
64 chunks = split_round_robin(spectra, parameter)
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
65 for i, x in enumerate(chunks):
1b09315a3f87 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
recetox
parents:
diff changeset
66 save_as_msp(x, os.path.join(outdir, f"chunk_{i}.msp"))