Mercurial > repos > galaxyp > calisp
annotate benchmarking.py @ 1:867f17ede7f3 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
author | galaxyp |
---|---|
date | Thu, 14 Sep 2023 12:49:19 +0000 (16 months ago) |
parents | |
children |
rev | line source |
---|---|
1
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
1 import argparse |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
2 import os |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
3 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
4 import numpy as np |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
5 import pandas as pd |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
6 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
7 # Define the ArgumentParser |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
8 parser = argparse.ArgumentParser("List of natural abundances of the isotopes") |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
9 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
10 parser.add_argument( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
11 "--input", type=str, metavar="data", help="Input file/folder", required=True |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
12 ) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
13 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
14 parser.add_argument( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
15 "--isotope_abundance_matrix", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
16 type=str, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
17 metavar="data", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
18 help="Isotope abundance matrix", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
19 required=True, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
20 ) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
21 parser.add_argument( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
22 "--isotope", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
23 type=str, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
24 metavar="ISOTOPE", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
25 help="Isotope", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
26 required=True, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
27 ) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
28 parser.add_argument( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
29 "--out_summary", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
30 type=str, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
31 metavar="output", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
32 help="Peptide summary output", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
33 required=False, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
34 ) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
35 parser.add_argument( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
36 "--out_filtered", type=str, metavar="output", help="Filtered output", required=False |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
37 ) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
38 parser.add_argument( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
39 "--nominal_values", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
40 type=str, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
41 metavar="nominal_values", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
42 help="Table giving nominal values", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
43 default=None, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
44 required=False, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
45 ) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
46 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
47 # Indicate end of argument definitions and parse args |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
48 args = parser.parse_args() |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
49 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
50 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
51 def parse_nominal_values(filename): |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
52 nominal_values = {} |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
53 if not filename: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
54 return nominal_values |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
55 with open(filename) as fh: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
56 for line in fh: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
57 line = line.strip() |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
58 if len(line) == 0 or line[0] == "#": |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
59 continue |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
60 line = line.split() |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
61 nominal_values[line[0]] = line[1] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
62 return nominal_values |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
63 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
64 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
65 # Benchmarking section |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
66 # the functions for optimising calis-p data |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
67 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
68 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
69 def load_calisp_data(filename, factor): |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
70 # (1) load data |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
71 file_count = 1 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
72 if os.path.isdir(filename): |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
73 file_data = [] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
74 file_count = len(os.listdir(filename)) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
75 for f in os.listdir(filename): |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
76 f = os.path.join(filename, f) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
77 file_data.append(pd.read_feather(f)) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
78 base, _ = os.path.splitext(f) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
79 file_data[-1].to_csv(f"{base}.tsv", sep="\t", index=False) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
80 data = pd.concat(file_data) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
81 else: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
82 data = pd.read_feather(filename) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
83 base, _ = os.path.splitext(filename) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
84 data.to_csv(f"{base}.tsv", sep="\t", index=False) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
85 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
86 file_success_count = len(data["ms_run"].unique()) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
87 # (2) calculate deltas |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
88 # ((1-f)/f) - 1 == 1/f -2 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
89 data["delta_na"] = data["ratio_na"] / ((1 / factor) - 2) * 1000 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
90 data["delta_fft"] = data["ratio_fft"] / ((1 / factor) - 2) * 1000 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
91 print( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
92 f"Loaded {len(data.index)} isotopic patterns from {file_success_count}/{file_count} file(s)" |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
93 ) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
94 return data |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
95 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
96 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
97 def filter_calisp_data(data, target): |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
98 if target.lower() == "na": |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
99 subdata = data.loc[ |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
100 lambda df: (df["flag_peak_at_minus_one_pos"] == False) # noqa: E712 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
101 & (df["flag_pattern_is_wobbly"] == False) # noqa: E712 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
102 & (df["flag_psm_has_low_confidence"] == False) # noqa: E712 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
103 & (df["flag_psm_is_ambiguous"] == False) # noqa: E712 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
104 & (df["flag_pattern_is_contaminated"] == False) # noqa: E712 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
105 & (df["flag_peptide_assigned_to_multiple_bins"] == False), # noqa: E712 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
106 :, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
107 ] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
108 elif target.lower() == "fft": |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
109 subdata = data.loc[ |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
110 lambda df: (df["error_fft"] < 0.001) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
111 & (df["flag_peptide_assigned_to_multiple_bins"] == False), # noqa: E712 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
112 :, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
113 ] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
114 elif target.lower() == "clumpy": |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
115 subdata = data.loc[ |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
116 lambda df: (df["error_clumpy"] < 0.001) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
117 & (df["flag_peptide_assigned_to_multiple_bins"] == False), # noqa: E712 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
118 :, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
119 ] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
120 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
121 print( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
122 f"{len(subdata.index)} ({len(subdata.index)/len(data.index)*100:.1f}%) remaining after filters." |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
123 ) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
124 return subdata |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
125 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
126 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
127 def estimate_clumpiness(data): |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
128 subdata = data.loc[lambda df: df["error_clumpy"] < 0.001, :] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
129 clumpiness = [] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
130 for c in ["c1", "c2", "c3", "c4", "c5", "c6"]: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
131 try: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
132 count, division = np.histogram(subdata[c], bins=50) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
133 count = count[1:-1] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
134 opt = 0.02 * np.where(count == count.max())[0][0] / 0.96 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
135 clumpiness.append(opt) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
136 except ValueError: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
137 pass |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
138 return clumpiness / sum(clumpiness) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
139 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
140 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
141 # the function for benchmarking |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
142 def benchmark_sip_mock_community_data(data, factor, nominal_values): |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
143 background_isotope = 1 - factor |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
144 background_unlabelled = factor |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
145 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
146 # For false positive discovery rates we set the threshold at the isotope/unlabelled associated with 1/4 of a generation |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
147 # of labeling. The E. coli values (1.7, 4.2 and 7.1) are for 1 generation at 1, 5 and 10% label, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
148 # and we take the background (1.07) into account as well. |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
149 thresholds = { |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
150 1: 1.07 + (1.7 - 1.07) / 4, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
151 5: 1.07 + (4.2 - 1.07) / 4, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
152 10: 1.07 + (7.1 - 1.07) / 4, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
153 } |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
154 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
155 filenames = data["ms_run"].unique() |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
156 for fname in filenames: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
157 print(f"Using nominal value {nominal_values.get(fname, 0)} for {fname}") |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
158 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
159 bin_names = data["bins"].unique() |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
160 peptide_sequences = data["peptide"].unique() |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
161 benchmarking = pd.DataFrame( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
162 columns=[ |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
163 "file", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
164 "bins", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
165 "% label", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
166 "ratio", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
167 "peptide", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
168 "psm_mz", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
169 "n(patterns)", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
170 "mean intensity", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
171 "ratio_NA median", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
172 "N mean", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
173 "ratio_NA SEM", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
174 "ratio_FFT median", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
175 "ratio_FFT SEM", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
176 "False Positive", |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
177 ] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
178 ) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
179 false_positives = 0 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
180 for p in peptide_sequences: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
181 pep_data = data.loc[lambda df: df["peptide"] == p, :] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
182 for b in bin_names: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
183 # bindata = data.loc[lambda df: df["bins"] == b, :] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
184 for f in filenames: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
185 nominal_value = nominal_values.get(fname, 0) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
186 unlabeled_fraction = 1 - nominal_value / 100 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
187 U = unlabeled_fraction * background_unlabelled |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
188 I = nominal_value / 100 + unlabeled_fraction * background_isotope |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
189 ratio = I / U * 100 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
190 pepfiledata = pep_data.loc[lambda df: df["ms_run"] == f, :] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
191 is_false_positive = 0 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
192 try: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
193 if ( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
194 b != "K12" |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
195 and pepfiledata["ratio_na"].median() > thresholds[nominal_value] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
196 ): |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
197 is_false_positive = 1 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
198 false_positives += 1 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
199 except KeyError: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
200 pass |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
201 benchmarking.loc[len(benchmarking)] = [ |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
202 f, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
203 b, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
204 nominal_value, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
205 ratio, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
206 p, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
207 pepfiledata["psm_mz"].median(), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
208 len(pepfiledata.index), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
209 pepfiledata["pattern_total_intensity"].mean(), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
210 pepfiledata["ratio_na"].median(), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
211 pepfiledata["psm_neutrons"].mean(), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
212 pepfiledata["ratio_na"].sem(), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
213 pepfiledata["ratio_fft"].median(), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
214 pepfiledata["ratio_fft"].sem(), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
215 is_false_positive, |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
216 ] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
217 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
218 benchmarking = benchmarking.sort_values(["bins", "peptide"]) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
219 benchmarking = benchmarking.reset_index(drop=True) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
220 return benchmarking |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
221 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
222 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
223 rowcol = { |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
224 "13C": (0, 1), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
225 "14C": (0, 2), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
226 "15N": (1, 1), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
227 "17O": (2, 1), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
228 "18O": (2, 2), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
229 "2H": (3, 1), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
230 "3H": (3, 2), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
231 "33S": (4, 1), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
232 "34S": (4, 2), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
233 "36S": (4, 3), |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
234 } |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
235 with open(args.isotope_abundance_matrix) as iamf: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
236 matrix = [] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
237 for line in iamf: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
238 line = line.strip() |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
239 line = line.split("#")[0] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
240 if line == "": |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
241 continue |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
242 matrix.append([float(x) for x in line.split()]) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
243 factor = matrix[rowcol[args.isotope][0]][rowcol[args.isotope][1]] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
244 print(f"Using factor {factor}") |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
245 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
246 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
247 # cleaning and filtering data |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
248 data = load_calisp_data(args.input, factor) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
249 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
250 if args.out_filtered: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
251 data = filter_calisp_data(data, "na") |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
252 data["peptide_clean"] = data["peptide"] |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
253 data["peptide_clean"] = data["peptide_clean"].replace("'Oxidation'", "", regex=True) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
254 data["peptide_clean"] = data["peptide_clean"].replace( |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
255 "'Carbamidomethyl'", "", regex=True |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
256 ) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
257 data["peptide_clean"] = data["peptide_clean"].replace(r"\s*\[.*\]", "", regex=True) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
258 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
259 data["ratio_na"] = data["ratio_na"] * 100 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
260 data["ratio_fft"] = data["ratio_fft"] * 100 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
261 data.to_csv(args.out_filtered, sep="\t", index=False) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
262 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
263 # The column "% label" indicates the amount of label applied (percentage of label in the glucose). The amount of |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
264 # labeled E. coli cells added corresponded to 1 generation of labeling (50% of E. coli cells were labeled in |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
265 # all experiments except controls) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
266 |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
267 if args.out_summary: |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
268 nominal_values = parse_nominal_values(args.nominal_values) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
269 benchmarks = benchmark_sip_mock_community_data(data, factor, nominal_values) |
867f17ede7f3
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tools/calisp commit 42e5dfeaa309e6ac17b4616314498a3b628272d2
galaxyp
parents:
diff
changeset
|
270 benchmarks.to_csv(args.out_summary, sep="\t", index=False) |