annotate use_theoretical_mz_annotations.py @ 2:e0cac9994a72 draft

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
author recetox
date Mon, 26 Feb 2024 10:40:17 +0000 (14 months ago)
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
1 import argparse
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
2 from typing import Iterator, Tuple
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
3
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
4
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
5 def get_peak_values(peak: str) -> Tuple[float, float, str]:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
6 """ Get the m/z and intensity value from the line containing the peak information. """
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
7 splitted_line = peak.split(maxsplit=2)
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
8 mz = float(splitted_line[0].strip())
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
9 intensity = float(splitted_line[1].strip())
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
10 comment = ''
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
11 if (len(splitted_line) == 3):
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
12 comment = splitted_line[2].strip()
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
13 return mz, intensity, comment
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
14
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
15
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
16 def get_peak_tuples(rline: str) -> Iterator[str]:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
17 """ Splits line at ';' and performs additional string cleaning. """
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
18 tokens = filter(None, rline.split(";"))
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
19 peak_pairs = map(lambda x: x.lstrip().rstrip(), tokens)
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
20 return peak_pairs
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
21
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
22
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
23 def overwrite_peaks(file: str, output: str, only_contains_annotation: bool = False) -> None:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
24 """This function overwrites peaks in the input file with annotated peaks.
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
25
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
26 Args:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
27 file (str): The path to the input file.
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
28 output (str): The path to the output file.
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
29 only_contains_annotation (bool, optional): If True, only peaks with annotations are processed. Defaults to False.
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
30
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
31 Returns:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
32 None: The function writes the output to a file and does not return anything.
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
33 """
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
34 annotated_msp = []
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
35 annotated_msp_list = []
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
36 peaks = []
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
37
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
38 with open(file, 'r') as file:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
39 while True:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
40 line = file.readline()
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
41 if not line.strip():
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
42 if len(peaks) > 0:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
43 annotated_msp_list.append(annotated_msp)
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
44 annotated_msp = []
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
45 if line == '':
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
46 break
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
47 if line.startswith('Num Peaks:'):
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
48 num_peaks = int(line.split(':')[1].strip())
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
49 peaks = []
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
50 for i in range(num_peaks):
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
51 line = file.readline()
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
52 peak_pairs = get_peak_tuples(line)
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
53
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
54 for peak in peak_pairs:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
55 mz, intensity, comment = get_peak_values(peak)
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
56 if comment != '':
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
57 tokens = comment.split()
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
58 mz = float(tokens[2].strip().rstrip(','))
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
59 peak_text = '%s\t%s\t%s\n' % (str(mz), str(intensity), str(comment))
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
60 peaks.append(peak_text)
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
61
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
62 if only_contains_annotation is False and comment == '':
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
63 peak_text = '%s\t%s\n' % (str(mz), str(intensity))
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
64 peaks.append(peak_text)
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
65
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
66 annotated_msp.append("Num Peaks: %d\n" % len(peaks))
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
67 for item in peaks:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
68 annotated_msp.append(item)
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
69 else:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
70 annotated_msp.append(line)
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
71
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
72 with open(output, 'w') as file:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
73 for spectra in annotated_msp_list:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
74 file.writelines(spectra)
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
75
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
76
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
77 def parse_arguments() -> argparse.Namespace:
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
78 parser = argparse.ArgumentParser()
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
79 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name')
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
80 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Output file name')
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
81 parser.add_argument('-a', '--annotated', action='store_true', help='Process only peaks with annotations')
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
82 args = parser.parse_args()
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
83 return args
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
84
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
85
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
86 if __name__ == "__main__":
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
87 args = parse_arguments()
e0cac9994a72 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff changeset
88 overwrite_peaks(args.input_filename, args.output_filename, args.annotated)