Mercurial > repos > recetox > use_theoretical_mz_annotations
annotate use_theoretical_mz_annotations.py @ 2:e0cac9994a72 draft
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
author | recetox |
---|---|
date | Mon, 26 Feb 2024 10:40:17 +0000 (14 months ago) |
parents | |
children |
rev | line source |
---|---|
2
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
1 import argparse |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
2 from typing import Iterator, Tuple |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
3 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
4 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
5 def get_peak_values(peak: str) -> Tuple[float, float, str]: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
6 """ Get the m/z and intensity value from the line containing the peak information. """ |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
7 splitted_line = peak.split(maxsplit=2) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
8 mz = float(splitted_line[0].strip()) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
9 intensity = float(splitted_line[1].strip()) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
10 comment = '' |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
11 if (len(splitted_line) == 3): |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
12 comment = splitted_line[2].strip() |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
13 return mz, intensity, comment |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
14 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
15 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
16 def get_peak_tuples(rline: str) -> Iterator[str]: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
17 """ Splits line at ';' and performs additional string cleaning. """ |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
18 tokens = filter(None, rline.split(";")) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
19 peak_pairs = map(lambda x: x.lstrip().rstrip(), tokens) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
20 return peak_pairs |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
21 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
22 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
23 def overwrite_peaks(file: str, output: str, only_contains_annotation: bool = False) -> None: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
24 """This function overwrites peaks in the input file with annotated peaks. |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
25 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
26 Args: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
27 file (str): The path to the input file. |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
28 output (str): The path to the output file. |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
29 only_contains_annotation (bool, optional): If True, only peaks with annotations are processed. Defaults to False. |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
30 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
31 Returns: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
32 None: The function writes the output to a file and does not return anything. |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
33 """ |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
34 annotated_msp = [] |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
35 annotated_msp_list = [] |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
36 peaks = [] |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
37 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
38 with open(file, 'r') as file: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
39 while True: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
40 line = file.readline() |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
41 if not line.strip(): |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
42 if len(peaks) > 0: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
43 annotated_msp_list.append(annotated_msp) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
44 annotated_msp = [] |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
45 if line == '': |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
46 break |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
47 if line.startswith('Num Peaks:'): |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
48 num_peaks = int(line.split(':')[1].strip()) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
49 peaks = [] |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
50 for i in range(num_peaks): |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
51 line = file.readline() |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
52 peak_pairs = get_peak_tuples(line) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
53 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
54 for peak in peak_pairs: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
55 mz, intensity, comment = get_peak_values(peak) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
56 if comment != '': |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
57 tokens = comment.split() |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
58 mz = float(tokens[2].strip().rstrip(',')) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
59 peak_text = '%s\t%s\t%s\n' % (str(mz), str(intensity), str(comment)) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
60 peaks.append(peak_text) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
61 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
62 if only_contains_annotation is False and comment == '': |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
63 peak_text = '%s\t%s\n' % (str(mz), str(intensity)) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
64 peaks.append(peak_text) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
65 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
66 annotated_msp.append("Num Peaks: %d\n" % len(peaks)) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
67 for item in peaks: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
68 annotated_msp.append(item) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
69 else: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
70 annotated_msp.append(line) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
71 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
72 with open(output, 'w') as file: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
73 for spectra in annotated_msp_list: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
74 file.writelines(spectra) |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
75 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
76 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
77 def parse_arguments() -> argparse.Namespace: |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
78 parser = argparse.ArgumentParser() |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
79 parser.add_argument('-i', '--input_filename', type=str, required=True, help='Input file name') |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
80 parser.add_argument('-o', '--output_filename', type=str, required=True, help='Output file name') |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
81 parser.add_argument('-a', '--annotated', action='store_true', help='Process only peaks with annotations') |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
82 args = parser.parse_args() |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
83 return args |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
84 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
85 |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
86 if __name__ == "__main__": |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
87 args = parse_arguments() |
e0cac9994a72
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 6d6720ef358935ad4bb7c5f26efb55647567eb14
recetox
parents:
diff
changeset
|
88 overwrite_peaks(args.input_filename, args.output_filename, args.annotated) |