Mercurial > repos > recetox > target_screen
annotate target_screen.py @ 0:d4c2d5bc0524 draft default tip
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
author | recetox |
---|---|
date | Thu, 26 Sep 2024 13:03:05 +0000 |
parents | |
children |
rev | line source |
---|---|
0
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
1 import argparse |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
2 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
3 import numpy as np |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
4 import pandas as pd |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
5 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
6 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
7 def mz_match(marker, peak, ppm): |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
8 return np.abs(marker - peak) <= ((peak + marker) / 2) * ppm * 1e-06 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
9 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
10 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
11 def rt_match(marker, peak, tol): |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
12 return np.abs(marker - peak) <= tol |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
13 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
14 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
15 def find_matches(peaks, markers, ppm, rt_tol): |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
16 # Create a meshgrid of all combinations of mz and rt values |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
17 marker_mz = markers['mz'].values[:, np.newaxis] |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
18 peak_mz = peaks['mz'].values |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
19 marker_rt = markers['rt'].values[:, np.newaxis] |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
20 peak_rt = peaks['rt'].values |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
21 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
22 # Calculate mz and rt matches |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
23 mz_matches = mz_match(marker_mz, peak_mz, ppm) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
24 rt_matches = rt_match(marker_rt, peak_rt, rt_tol) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
25 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
26 # Find the indices where both mz and rt match |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
27 match_indices = np.where(mz_matches & rt_matches) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
28 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
29 # Create a DataFrame of hits |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
30 matched_markers = markers.iloc[match_indices[0]].reset_index(drop=True) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
31 matched_peaks = peaks.iloc[match_indices[1]].reset_index(drop=True) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
32 hits = pd.concat([matched_markers[['formula']].reset_index(drop=True), matched_peaks], axis=1) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
33 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
34 # Calculate mz and rt differences |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
35 hits['mz_diff'] = np.abs(matched_markers['mz'].values - matched_peaks['mz'].values) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
36 hits['rt_diff'] = np.abs(matched_markers['rt'].values - matched_peaks['rt'].values) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
37 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
38 return hits |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
39 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
40 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
41 def main(): |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
42 parser = argparse.ArgumentParser(description='Find matches between peaks and markers.') |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
43 parser.add_argument('--peaks', required=True, help='Path to the peaks parquet file.') |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
44 parser.add_argument('--markers', required=True, help='Path to the markers CSV file.') |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
45 parser.add_argument('--output', required=True, help='Path to the output TSV file.') |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
46 parser.add_argument('--ppm', type=int, default=5, help='PPM tolerance for mz matching.') |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
47 parser.add_argument('--rt_tol', type=int, default=10, help='RT tolerance for rt matching.') |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
48 args = parser.parse_args() |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
49 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
50 peaks = pd.read_parquet(args.peaks) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
51 markers = pd.read_csv(args.markers, sep='\t') |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
52 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
53 hits = find_matches(peaks, markers, args.ppm, args.rt_tol) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
54 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
55 hits.to_csv(args.output, sep='\t', index=False) |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
56 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
57 |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
58 if __name__ == "__main__": |
d4c2d5bc0524
planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/misc commit 94322884bede7ddb9f2a9166952dd0115bdb4e49
recetox
parents:
diff
changeset
|
59 main() |