Mercurial > repos > recetox > matchms_spectral_similarity
annotate matchms_split.py @ 12:67f65bcc94bf draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f14275bfdc2caa760d71d307ca1803eab1adde76
| author | recetox | 
|---|---|
| date | Wed, 13 Mar 2024 10:20:15 +0000 | 
| parents | dbe94781524a | 
| children | b2c7b7705734 | 
| rev | line source | 
|---|---|
| 0 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 1 import argparse | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 2 import itertools | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 3 import os | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 4 | 
| 10 
dbe94781524a
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
 recetox parents: 
0diff
changeset | 5 import matchms | 
| 0 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 6 from matchms.exporting import save_as_msp | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 7 from matchms.importing import load_from_msp | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 8 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 9 | 
| 10 
dbe94781524a
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
 recetox parents: 
0diff
changeset | 10 matchms.Metadata.set_key_replacements({}) | 
| 0 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 11 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 12 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 13 def make_outdir(outdir: str): | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 14 """Create destination directory. | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 15 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 16 Args: | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 17 outdir (str): Path to destination directory where split spectra files are generated. | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 18 """ | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 19 return os.mkdir(outdir) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 20 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 21 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 22 def write_spectra(spectra, outdir): | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 23 """Generates MSP files of individual spectra. | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 24 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 25 Args: | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 26 spectra (List[Spectrum]): Spectra to write to file | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 27 outdir (str): Path to destination directory. | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 28 """ | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 29 for i in range(len(spectra)): | 
| 10 
dbe94781524a
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
 recetox parents: 
0diff
changeset | 30 save_as_msp(spectra[i], os.path.join(outdir, f"{i}.msp")) | 
| 0 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 31 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 32 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 33 def split_round_robin(iterable, num_chunks): | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 34 chunks = [list() for _ in range(num_chunks)] | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 35 index = itertools.cycle(range(num_chunks)) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 36 for value in iterable: | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 37 chunks[next(index)].append(value) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 38 chunks = filter(lambda x: len(x) > 0, chunks) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 39 return chunks | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 40 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 41 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 42 listarg = argparse.ArgumentParser() | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 43 listarg.add_argument('--filename', type=str) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 44 listarg.add_argument('--method', type=str) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 45 listarg.add_argument('--outdir', type=str) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 46 listarg.add_argument('--parameter', type=int) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 47 args = listarg.parse_args() | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 48 outdir = args.outdir | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 49 filename = args.filename | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 50 method = args.method | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 51 parameter = args.parameter | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 52 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 53 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 54 if __name__ == "__main__": | 
| 10 
dbe94781524a
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit da193865f41a3a840ecc4ba0afab1d358554998a
 recetox parents: 
0diff
changeset | 55 spectra = load_from_msp(filename, metadata_harmonization=False) | 
| 0 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 56 make_outdir(outdir) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 57 | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 58 if method == "one-per-file": | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 59 write_spectra(list(spectra), outdir) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 60 else: | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 61 if method == "chunk-size": | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 62 chunks = iter(lambda: list(itertools.islice(spectra, parameter)), []) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 63 elif method == "num-chunks": | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 64 chunks = split_round_robin(spectra, parameter) | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 65 for i, x in enumerate(chunks): | 
| 
9ff95a1a2705
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/matchms commit f79a5b51599254817727bc9028b9797ea994cb4e
 recetox parents: diff
changeset | 66 save_as_msp(x, os.path.join(outdir, f"chunk_{i}.msp")) | 
