Mercurial > repos > goeckslab > gate_finder
diff gate_finder.py @ 0:6df8d6e42152 draft
planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/vitessce commit 9b2dc921e692af8045773013d9f87d4d790e2ea1
author | goeckslab |
---|---|
date | Thu, 08 Sep 2022 17:22:53 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gate_finder.py Thu Sep 08 17:22:53 2022 +0000 @@ -0,0 +1,156 @@ +import argparse +import json +import warnings +from pathlib import Path + +import numpy as np +import pandas as pd +from anndata import read_h5ad +from sklearn.mixture import GaussianMixture +from sklearn.preprocessing import MinMaxScaler +from vitessce import ( + AnnDataWrapper, + Component as cm, + MultiImageWrapper, + OmeTiffWrapper, + VitessceConfig, +) + + +# Generate binarized phenotype for a gate +def get_gate_phenotype(g, d): + dd = d.copy() + dd = np.where(dd < g, 0, dd) + np.warnings.filterwarnings('ignore') + dd = np.where(dd >= g, 1, dd) + return dd + + +def get_gmm_phenotype(data): + low = np.percentile(data, 0.01) + high = np.percentile(data, 99.99) + data = np.clip(data, low, high) + + sum = np.sum(data) + median = np.median(data) + data_med = data / sum * median + + data_log = np.log1p(data_med) + data_log = data_log.reshape(-1, 1) + + scaler = MinMaxScaler(feature_range=(0, 1)) + data_norm = scaler.fit_transform(data_log) + + gmm = GaussianMixture(n_components=2) + gmm.fit(data_norm) + gate = np.mean(gmm.means_) + + return get_gate_phenotype(gate, np.ravel(data_norm)) + + +def main(inputs, output, image, anndata, masks=None): + """ + Parameter + --------- + inputs : str + File path to galaxy tool parameter. + output : str + Output folder for saving web content. + image : str + File path to the OME Tiff image. + anndata : str + File path to anndata containing phenotyping info. + masks : str + File path to the image masks. + """ + warnings.simplefilter('ignore') + + with open(inputs, 'r') as param_handler: + params = json.load(param_handler) + + marker = params['marker'].strip() + from_gate = params['from_gate'] + to_gate = params['to_gate'] + increment = params['increment'] + x_coordinate = params['x_coordinate'].strip() or 'X_centroid' + y_coordinate = params['y_coordinate'].strip() or 'Y_centroid' + + adata = read_h5ad(anndata) + + # If no raw data is available make a copy + if adata.raw is None: + adata.raw = adata + + # Copy of the raw data if it exisits + if adata.raw is not None: + adata.X = adata.raw.X + + data = pd.DataFrame( + adata.X, + columns=adata.var.index, + index=adata.obs.index + ) + marker_values = data[[marker]].values + marker_values_log = np.log1p(marker_values) + + # Identify the list of increments + gate_names = [] + for num in np.arange(from_gate, to_gate, increment): + num = round(num, 3) + key = marker + '--' + str(num) + adata.obs[key] = get_gate_phenotype(num, marker_values_log) + gate_names.append(key) + + adata.obs['GMM_auto'] = get_gmm_phenotype(marker_values) + gate_names.append('GMM_auto') + + adata.obsm['XY_coordinate'] = adata.obs[[x_coordinate, y_coordinate]].values + + vc = VitessceConfig(name=None, description=None) + dataset = vc.add_dataset() + image_wrappers = [OmeTiffWrapper(img_path=image, name='OMETIFF')] + if masks: + image_wrappers.append( + OmeTiffWrapper(img_path=masks, name='MASKS', is_bitmask=True) + ) + dataset.add_object(MultiImageWrapper(image_wrappers)) + + dataset.add_object( + AnnDataWrapper( + adata, + spatial_centroid_obsm='XY_coordinate', + cell_set_obs=gate_names, + cell_set_obs_names=[obj[0].upper() + obj[1:] for obj in gate_names], + expression_matrix="X" + ) + ) + spatial = vc.add_view(dataset, cm.SPATIAL) + cellsets = vc.add_view(dataset, cm.CELL_SETS) + status = vc.add_view(dataset, cm.STATUS) + lc = vc.add_view(dataset, cm.LAYER_CONTROLLER) + genes = vc.add_view(dataset, cm.GENES) + cell_set_sizes = vc.add_view(dataset, cm.CELL_SET_SIZES) + cell_set_expression = vc.add_view(dataset, cm.CELL_SET_EXPRESSION) + + vc.layout( + (status / genes / cell_set_expression) + | (cellsets / cell_set_sizes / lc) + | (spatial) + ) + config_dict = vc.export(to='files', base_url='http://localhost', out_dir=output) + + with open(Path(output).joinpath('config.json'), 'w') as f: + json.dump(config_dict, f, indent=4) + + +if __name__ == '__main__': + aparser = argparse.ArgumentParser() + aparser.add_argument("-i", "--inputs", dest="inputs", required=True) + aparser.add_argument("-e", "--output", dest="output", required=True) + aparser.add_argument("-g", "--image", dest="image", required=True) + aparser.add_argument("-a", "--anndata", dest="anndata", required=True) + aparser.add_argument("-m", "--masks", dest="masks", required=False) + + args = aparser.parse_args() + + main(args.inputs, args.output, args.image, args.anndata, args.masks)