Mercurial > repos > recetox > use_theoretical_mz_annotations
comparison use_theoretical_mz_annotations.py @ 2:e0cac9994a72 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
author | recetox |
---|---|
date | Mon, 26 Feb 2024 10:40:17 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:3c7f20b24047 | 2:e0cac9994a72 |
---|---|
1 import argparse | |
2 from typing import Iterator, Tuple | |
3 | |
4 | |
5 def get_peak_values(peak: str) -> Tuple[float, float, str]: | |
6 """ Get the m/z and intensity value from the line containing the peak information. """ | |
7 splitted_line = peak.split(maxsplit=2) | |
8 mz = float(splitted_line[0].strip()) | |
9 intensity = float(splitted_line[1].strip()) | |
10 comment = '' | |
11 if (len(splitted_line) == 3): | |
12 comment = splitted_line[2].strip() | |
13 return mz, intensity, comment | |
14 | |
15 | |
16 def get_peak_tuples(rline: str) -> Iterator[str]: | |
17 """ Splits line at ';' and performs additional string cleaning. """ | |
18 tokens = filter(None, rline.split(";")) | |
19 peak_pairs = map(lambda x: x.lstrip().rstrip(), tokens) | |
20 return peak_pairs | |
21 | |
22 | |
23 def overwrite_peaks(file: str, output: str, only_contains_annotation: bool = False) -> None: | |
24 """This function overwrites peaks in the input file with annotated peaks. | |
25 | |
26 Args: | |
27 file (str): The path to the input file. | |
28 output (str): The path to the output file. | |
29 only_contains_annotation (bool, optional): If True, only peaks with annotations are processed. Defaults to False. | |
30 | |
31 Returns: | |
32 None: The function writes the output to a file and does not return anything. | |
33 """ | |
34 annotated_msp = [] | |
35 annotated_msp_list = [] | |
36 peaks = [] | |
37 | |
38 with open(file, 'r') as file: | |
39 while True: | |
40 line = file.readline() | |
41 if not line.strip(): | |
42 if len(peaks) > 0: | |
43 annotated_msp_list.append(annotated_msp) | |
44 annotated_msp = [] | |
45 if line == '': | |
46 break | |
47 if line.startswith('Num Peaks:'): | |
48 num_peaks = int(line.split(':')[1].strip()) | |
49 peaks = [] | |
50 for i in range(num_peaks): | |
51 line = file.readline() | |
52 peak_pairs = get_peak_tuples(line) | |
53 | |
54 for peak in peak_pairs: | |
55 mz, intensity, comment = get_peak_values(peak) | |
56 if comment != '': | |
57 tokens = comment.split() | |
58 mz = float(tokens[2].strip().rstrip(',')) | |
59 peak_text = '%s\t%s\t%s\n' % (str(mz), str(intensity), str(comment)) | |
60 peaks.append(peak_text) | |
61 | |
62 if only_contains_annotation is False and comment == '': | |
63 peak_text = '%s\t%s\n' % (str(mz), str(intensity)) | |
64 peaks.append(peak_text) | |
65 | |
66 annotated_msp.append("Num Peaks: %d\n" % len(peaks)) | |
67 for item in peaks: | |
68 annotated_msp.append(item) | |
69 else: | |
70 annotated_msp.append(line) | |
71 | |
72 with open(output, 'w') as file: | |
73 for spectra in annotated_msp_list: | |
74 file.writelines(spectra) | |
75 | |
76 | |
77 def parse_arguments() -> argparse.Namespace: | |
78 parser = argparse.ArgumentParser() | |
79 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') | |
80 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Output file name') | |
81 parser.add_argument('-a', '--annotated', action='store_true', help='Process only peaks with annotations') | |
82 args = parser.parse_args() | |
83 return args | |
84 | |
85 | |
86 if __name__ == "__main__": | |
87 args = parse_arguments() | |
88 overwrite_peaks(args.input_filename, args.output_filename, args.annotated) |