annotate target_screen.py @ 0:d4c2d5bc0524 draft default tip

planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
author recetox
date Thu, 26 Sep 2024 13:03:05 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
1 import argparse
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
2
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
3 import numpy as np
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
4 import pandas as pd
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
5
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
6
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
7 def mz_match(marker, peak, ppm):
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
8 return np.abs(marker - peak) <= ((peak + marker) / 2) * ppm * 1e-06
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
9
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
10
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
11 def rt_match(marker, peak, tol):
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
12 return np.abs(marker - peak) <= tol
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
13
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
14
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
15 def find_matches(peaks, markers, ppm, rt_tol):
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
16 # Create a meshgrid of all combinations of mz and rt values
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
17 marker_mz = markers['mz'].values[:, np.newaxis]
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
18 peak_mz = peaks['mz'].values
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
19 marker_rt = markers['rt'].values[:, np.newaxis]
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
20 peak_rt = peaks['rt'].values
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
21
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
22 # Calculate mz and rt matches
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
23 mz_matches = mz_match(marker_mz, peak_mz, ppm)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
24 rt_matches = rt_match(marker_rt, peak_rt, rt_tol)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
25
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
26 # Find the indices where both mz and rt match
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
27 match_indices = np.where(mz_matches & rt_matches)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
28
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
29 # Create a DataFrame of hits
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
30 matched_markers = markers.iloc[match_indices[0]].reset_index(drop=True)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
31 matched_peaks = peaks.iloc[match_indices[1]].reset_index(drop=True)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
32 hits = pd.concat([matched_markers[['formula']].reset_index(drop=True), matched_peaks], axis=1)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
33
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
34 # Calculate mz and rt differences
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
35 hits['mz_diff'] = np.abs(matched_markers['mz'].values - matched_peaks['mz'].values)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
36 hits['rt_diff'] = np.abs(matched_markers['rt'].values - matched_peaks['rt'].values)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
37
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
38 return hits
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
39
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
40
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
41 def main():
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
42 parser = argparse.ArgumentParser(description='Find matches between peaks and markers.')
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
43 parser.add_argument('--peaks', required=True, help='Path to the peaks parquet file.')
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
44 parser.add_argument('--markers', required=True, help='Path to the markers CSV file.')
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
45 parser.add_argument('--output', required=True, help='Path to the output TSV file.')
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
46 parser.add_argument('--ppm', type=int, default=5, help='PPM tolerance for mz matching.')
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
47 parser.add_argument('--rt_tol', type=int, default=10, help='RT tolerance for rt matching.')
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
48 args = parser.parse_args()
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
49
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
50 peaks = pd.read_parquet(args.peaks)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
51 markers = pd.read_csv(args.markers, sep='\t')
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
52
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
53 hits = find_matches(peaks, markers, args.ppm, args.rt_tol)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
54
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
55 hits.to_csv(args.output, sep='\t', index=False)
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
56
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
57
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
58 if __name__ == "__main__":
d4c2d5bc0524 planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff changeset
59 main()