comparison gate_finder.py @ 0:6df8d6e42152 draft

planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/vitessce commit 9b2dc921e692af8045773013d9f87d4d790e2ea1
author goeckslab
date Thu, 08 Sep 2022 17:22:53 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:6df8d6e42152
1 import argparse
2 import json
3 import warnings
4 from pathlib import Path
5
6 import numpy as np
7 import pandas as pd
8 from anndata import read_h5ad
9 from sklearn.mixture import GaussianMixture
10 from sklearn.preprocessing import MinMaxScaler
11 from vitessce import (
12 AnnDataWrapper,
13 Component as cm,
14 MultiImageWrapper,
15 OmeTiffWrapper,
16 VitessceConfig,
17 )
18
19
20 # Generate binarized phenotype for a gate
21 def get_gate_phenotype(g, d):
22 dd = d.copy()
23 dd = np.where(dd < g, 0, dd)
24 np.warnings.filterwarnings('ignore')
25 dd = np.where(dd >= g, 1, dd)
26 return dd
27
28
29 def get_gmm_phenotype(data):
30 low = np.percentile(data, 0.01)
31 high = np.percentile(data, 99.99)
32 data = np.clip(data, low, high)
33
34 sum = np.sum(data)
35 median = np.median(data)
36 data_med = data / sum * median
37
38 data_log = np.log1p(data_med)
39 data_log = data_log.reshape(-1, 1)
40
41 scaler = MinMaxScaler(feature_range=(0, 1))
42 data_norm = scaler.fit_transform(data_log)
43
44 gmm = GaussianMixture(n_components=2)
45 gmm.fit(data_norm)
46 gate = np.mean(gmm.means_)
47
48 return get_gate_phenotype(gate, np.ravel(data_norm))
49
50
51 def main(inputs, output, image, anndata, masks=None):
52 """
53 Parameter
54 ---------
55 inputs : str
56 File path to galaxy tool parameter.
57 output : str
58 Output folder for saving web content.
59 image : str
60 File path to the OME Tiff image.
61 anndata : str
62 File path to anndata containing phenotyping info.
63 masks : str
64 File path to the image masks.
65 """
66 warnings.simplefilter('ignore')
67
68 with open(inputs, 'r') as param_handler:
69 params = json.load(param_handler)
70
71 marker = params['marker'].strip()
72 from_gate = params['from_gate']
73 to_gate = params['to_gate']
74 increment = params['increment']
75 x_coordinate = params['x_coordinate'].strip() or 'X_centroid'
76 y_coordinate = params['y_coordinate'].strip() or 'Y_centroid'
77
78 adata = read_h5ad(anndata)
79
80 # If no raw data is available make a copy
81 if adata.raw is None:
82 adata.raw = adata
83
84 # Copy of the raw data if it exisits
85 if adata.raw is not None:
86 adata.X = adata.raw.X
87
88 data = pd.DataFrame(
89 adata.X,
90 columns=adata.var.index,
91 index=adata.obs.index
92 )
93 marker_values = data[[marker]].values
94 marker_values_log = np.log1p(marker_values)
95
96 # Identify the list of increments
97 gate_names = []
98 for num in np.arange(from_gate, to_gate, increment):
99 num = round(num, 3)
100 key = marker + '--' + str(num)
101 adata.obs[key] = get_gate_phenotype(num, marker_values_log)
102 gate_names.append(key)
103
104 adata.obs['GMM_auto'] = get_gmm_phenotype(marker_values)
105 gate_names.append('GMM_auto')
106
107 adata.obsm['XY_coordinate'] = adata.obs[[x_coordinate, y_coordinate]].values
108
109 vc = VitessceConfig(name=None, description=None)
110 dataset = vc.add_dataset()
111 image_wrappers = [OmeTiffWrapper(img_path=image, name='OMETIFF')]
112 if masks:
113 image_wrappers.append(
114 OmeTiffWrapper(img_path=masks, name='MASKS', is_bitmask=True)
115 )
116 dataset.add_object(MultiImageWrapper(image_wrappers))
117
118 dataset.add_object(
119 AnnDataWrapper(
120 adata,
121 spatial_centroid_obsm='XY_coordinate',
122 cell_set_obs=gate_names,
123 cell_set_obs_names=[obj[0].upper() + obj[1:] for obj in gate_names],
124 expression_matrix="X"
125 )
126 )
127 spatial = vc.add_view(dataset, cm.SPATIAL)
128 cellsets = vc.add_view(dataset, cm.CELL_SETS)
129 status = vc.add_view(dataset, cm.STATUS)
130 lc = vc.add_view(dataset, cm.LAYER_CONTROLLER)
131 genes = vc.add_view(dataset, cm.GENES)
132 cell_set_sizes = vc.add_view(dataset, cm.CELL_SET_SIZES)
133 cell_set_expression = vc.add_view(dataset, cm.CELL_SET_EXPRESSION)
134
135 vc.layout(
136 (status / genes / cell_set_expression)
137 | (cellsets / cell_set_sizes / lc)
138 | (spatial)
139 )
140 config_dict = vc.export(to='files', base_url='http://localhost', out_dir=output)
141
142 with open(Path(output).joinpath('config.json'), 'w') as f:
143 json.dump(config_dict, f, indent=4)
144
145
146 if __name__ == '__main__':
147 aparser = argparse.ArgumentParser()
148 aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
149 aparser.add_argument("-e", "--output", dest="output", required=True)
150 aparser.add_argument("-g", "--image", dest="image", required=True)
151 aparser.add_argument("-a", "--anndata", dest="anndata", required=True)
152 aparser.add_argument("-m", "--masks", dest="masks", required=False)
153
154 args = aparser.parse_args()
155
156 main(args.inputs, args.output, args.image, args.anndata, args.masks)