comparison use_theoretical_mz_annotations.py @ 2:e0cac9994a72 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
author recetox
date Mon, 26 Feb 2024 10:40:17 +0000
parents
children
comparison
equal deleted inserted replaced
1:3c7f20b24047 2:e0cac9994a72
1 import argparse
2 from typing import Iterator, Tuple
3
4
5 def get_peak_values(peak: str) -> Tuple[float, float, str]:
6 """ Get the m/z and intensity value from the line containing the peak information. """
7 splitted_line = peak.split(maxsplit=2)
8 mz = float(splitted_line[0].strip())
9 intensity = float(splitted_line[1].strip())
10 comment = ''
11 if (len(splitted_line) == 3):
12 comment = splitted_line[2].strip()
13 return mz, intensity, comment
14
15
16 def get_peak_tuples(rline: str) -> Iterator[str]:
17 """ Splits line at ';' and performs additional string cleaning. """
18 tokens = filter(None, rline.split(";"))
19 peak_pairs = map(lambda x: x.lstrip().rstrip(), tokens)
20 return peak_pairs
21
22
23 def overwrite_peaks(file: str, output: str, only_contains_annotation: bool = False) -> None:
24 """This function overwrites peaks in the input file with annotated peaks.
25
26 Args:
27 file (str): The path to the input file.
28 output (str): The path to the output file.
29 only_contains_annotation (bool, optional): If True, only peaks with annotations are processed. Defaults to False.
30
31 Returns:
32 None: The function writes the output to a file and does not return anything.
33 """
34 annotated_msp = []
35 annotated_msp_list = []
36 peaks = []
37
38 with open(file, 'r') as file:
39 while True:
40 line = file.readline()
41 if not line.strip():
42 if len(peaks) > 0:
43 annotated_msp_list.append(annotated_msp)
44 annotated_msp = []
45 if line == '':
46 break
47 if line.startswith('Num Peaks:'):
48 num_peaks = int(line.split(':')[1].strip())
49 peaks = []
50 for i in range(num_peaks):
51 line = file.readline()
52 peak_pairs = get_peak_tuples(line)
53
54 for peak in peak_pairs:
55 mz, intensity, comment = get_peak_values(peak)
56 if comment != '':
57 tokens = comment.split()
58 mz = float(tokens[2].strip().rstrip(','))
59 peak_text = '%s\t%s\t%s\n' % (str(mz), str(intensity), str(comment))
60 peaks.append(peak_text)
61
62 if only_contains_annotation is False and comment == '':
63 peak_text = '%s\t%s\n' % (str(mz), str(intensity))
64 peaks.append(peak_text)
65
66 annotated_msp.append("Num Peaks: %d\n" % len(peaks))
67 for item in peaks:
68 annotated_msp.append(item)
69 else:
70 annotated_msp.append(line)
71
72 with open(output, 'w') as file:
73 for spectra in annotated_msp_list:
74 file.writelines(spectra)
75
76
77 def parse_arguments() -> argparse.Namespace:
78 parser = argparse.ArgumentParser()
79 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name')
80 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Output file name')
81 parser.add_argument('-a', '--annotated', action='store_true', help='Process only peaks with annotations')
82 args = parser.parse_args()
83 return args
84
85
86 if __name__ == "__main__":
87 args = parse_arguments()
88 overwrite_peaks(args.input_filename, args.output_filename, args.annotated)