Mercurial > repos > recetox > matchms_split
annotate matchms_split.py @ 21:500f473df7e4 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 8db07edd3b0d2ff778036dec410027ad58365488
| author | recetox | 
|---|---|
| date | Mon, 15 Jul 2024 07:59:51 +0000 | 
| parents | 114617e6ad33 | 
| children | 7676700d7c4f | 
| rev | line source | 
|---|---|
| 
0
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
1 import argparse | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
2 import itertools | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
3 import os | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
4 | 
| 
14
 
114617e6ad33
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
 
recetox 
parents: 
4 
diff
changeset
 | 
5 import matchms | 
| 
0
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
6 from matchms.exporting import save_as_msp | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
7 from matchms.importing import load_from_msp | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
8 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
9 | 
| 
14
 
114617e6ad33
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
 
recetox 
parents: 
4 
diff
changeset
 | 
10 matchms.Metadata.set_key_replacements({}) | 
| 
0
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
11 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
12 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
13 def make_outdir(outdir: str): | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
14 """Create destination directory. | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
15 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
16 Args: | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
17 outdir (str): Path to destination directory where split spectra files are generated. | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
18 """ | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
19 return os.mkdir(outdir) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
20 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
21 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
22 def write_spectra(spectra, outdir): | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
23 """Generates MSP files of individual spectra. | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
24 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
25 Args: | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
26 spectra (List[Spectrum]): Spectra to write to file | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
27 outdir (str): Path to destination directory. | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
28 """ | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
29 for i in range(len(spectra)): | 
| 
14
 
114617e6ad33
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
 
recetox 
parents: 
4 
diff
changeset
 | 
30 save_as_msp(spectra[i], os.path.join(outdir, f"{i}.msp")) | 
| 
0
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
31 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
32 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
33 def split_round_robin(iterable, num_chunks): | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
34 chunks = [list() for _ in range(num_chunks)] | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
35 index = itertools.cycle(range(num_chunks)) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
36 for value in iterable: | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
37 chunks[next(index)].append(value) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
38 chunks = filter(lambda x: len(x) > 0, chunks) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
39 return chunks | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
40 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
41 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
42 listarg = argparse.ArgumentParser() | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
43 listarg.add_argument('--filename', type=str) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
44 listarg.add_argument('--method', type=str) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
45 listarg.add_argument('--outdir', type=str) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
46 listarg.add_argument('--parameter', type=int) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
47 args = listarg.parse_args() | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
48 outdir = args.outdir | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
49 filename = args.filename | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
50 method = args.method | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
51 parameter = args.parameter | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
52 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
53 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
54 if __name__ == "__main__": | 
| 
14
 
114617e6ad33
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
 
recetox 
parents: 
4 
diff
changeset
 | 
55 spectra = load_from_msp(filename, metadata_harmonization=False) | 
| 
0
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
56 make_outdir(outdir) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
57 | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
58 if method == "one-per-file": | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
59 write_spectra(list(spectra), outdir) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
60 else: | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
61 if method == "chunk-size": | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
62 chunks = iter(lambda: list(itertools.islice(spectra, parameter)), []) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
63 elif method == "num-chunks": | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
64 chunks = split_round_robin(spectra, parameter) | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
65 for i, x in enumerate(chunks): | 
| 
 
169c72b2ce79
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit 5888b20035c9c782b7c94495b0760134f82f4c2e
 
recetox 
parents:  
diff
changeset
 | 
66 save_as_msp(x, os.path.join(outdir, f"chunk_{i}.msp")) | 
