diff vitessce_spatial.py @ 4:068da7f7cd83 draft default tip

planemo upload for repository https://github.com/goeckslab/tools-mti/tree/main/tools/vitessce commit bc4c0bb6784a55399241f99a29b176541a164a18
author goeckslab
date Thu, 20 Feb 2025 19:47:16 +0000
parents 9f60ef2d586e
children
line wrap: on
line diff
--- a/vitessce_spatial.py	Thu May 30 17:24:44 2024 +0000
+++ b/vitessce_spatial.py	Thu Feb 20 19:47:16 2025 +0000
@@ -1,6 +1,7 @@
 import argparse
 import json
 import warnings
+from os.path import isdir, join
 from pathlib import Path
 
 import scanpy as sc
@@ -12,14 +13,18 @@
     OmeTiffWrapper,
     VitessceConfig,
 )
+from vitessce.data_utils import (
+    optimize_adata,
+    VAR_CHUNK_SIZE,
+)
 
 
-def main(inputs, output, image, anndata=None, masks=None):
+def main(inputs, output, image, offsets=None, anndata=None, masks=None):
     """
     Parameter
     ---------
     inputs : str
-        File path to galaxy tool parameter.
+        File path to galaxy inputs config file.
     output : str
         Output folder for saving web content.
     image : str
@@ -34,26 +39,52 @@
     with open(inputs, 'r') as param_handler:
         params = json.load(param_handler)
 
-    vc = VitessceConfig(name=None, description=None)
+    # initialize vitessce config and add OME-TIFF image, and masks if specified
+    vc = VitessceConfig(schema_version="1.0.17", name=None, description=None)
     dataset = vc.add_dataset()
-    image_wrappers = [OmeTiffWrapper(img_path=image, name='OMETIFF')]
+
+    # FIXME: grab offsets file for faster display. NEED TO TEST
+    image_wrappers = [OmeTiffWrapper(img_path=image, offsets_path=offsets, name='OMETIFF')]
     if masks:
         image_wrappers.append(
             OmeTiffWrapper(img_path=masks, name='MASKS', is_bitmask=True)
         )
     dataset.add_object(MultiImageWrapper(image_wrappers))
 
-    status = vc.add_view(dataset, cm.STATUS)
-    spatial = vc.add_view(dataset, cm.SPATIAL)
-    lc = vc.add_view(dataset, cm.LAYER_CONTROLLER)
+    # set relative view sizes (w,h), full window dims are 12x12
+    # if no anndata file, image and layer view can take up whole window
+    if not anndata:
+        spatial_dims = (9, 12)
+        lc_dims = (3, 12)
+    else:
+        spatial_dims = (6, 6)
+        lc_dims = (3, 6)
 
+    # add views for the images, and the layer/channels selector
+    spatial = vc.add_view(
+        view_type=cm.SPATIAL,
+        dataset=dataset,
+        w=spatial_dims[0],
+        h=spatial_dims[1])
+
+    lc = vc.add_view(
+        view_type=cm.LAYER_CONTROLLER,
+        dataset=dataset,
+        w=lc_dims[0],
+        h=lc_dims[1])
+
+    # if no anndata file, export the config with these minimal components
     if not anndata:
-        vc.layout(status / lc | spatial)
-        config_dict = vc.export(to='files', base_url='http://localhost', out_dir=output)
+        vc.layout(lc | spatial)
+        config_dict = vc.export(
+            to='files',
+            base_url='http://localhost',
+            out_dir=output)
         with open(Path(output).joinpath('config.json'), 'w') as f:
             json.dump(config_dict, f, indent=4)
         return
 
+    # read anndata file, compute embeddings
     adata = read_h5ad(anndata)
 
     params = params['do_phenotyping']
@@ -68,41 +99,100 @@
         sc.tl.tsne(adata, **embedding_options)
         mappings_obsm = 'X_tsne'
         mappings_obsm_name = "tSNE"
-    else:         # pca
+    else:
         sc.tl.pca(adata, **embedding_options)
         mappings_obsm = 'X_pca'
         mappings_obsm_name = "PCA"
 
-    adata.obsm['XY_centroid'] = adata.obs[['X_centroid', 'Y_centroid']].values
+    # Add spatial coords to obsm, although uncertain if this is needed
+    # FIXME: provide options for alternative coordinate colnames
+    adata.obsm['spatial'] = adata.obs[['X_centroid', 'Y_centroid']].values
 
+    # parse list of obs columns to use as cell type labels
     cell_set_obs = params['phenotype_factory']['phenotypes']
     if not isinstance(cell_set_obs, list):
         cell_set_obs = [x.strip() for x in cell_set_obs.split(',')]
+
+    # write anndata out as zarr hierarchy
+    zarr_filepath = join("data", "adata.zarr")
+    if not isdir(zarr_filepath):
+        adata = optimize_adata(
+            adata,
+            obs_cols=cell_set_obs,
+            obsm_keys=[mappings_obsm, 'spatial'],
+            optimize_X=True
+        )
+        adata.write_zarr(
+            zarr_filepath,
+            chunks=[adata.shape[0], VAR_CHUNK_SIZE]
+        )
+
+    # create a nicer label for the cell types to be displayed on the dashboard
     cell_set_obs_names = [obj[0].upper() + obj[1:] for obj in cell_set_obs]
+
+    # add anndata zarr to vitessce config
     dataset.add_object(
         AnnDataWrapper(
-            adata,
-            mappings_obsm=[mappings_obsm],
-            mappings_obsm_names=[mappings_obsm_name],
-            spatial_centroid_obsm='XY_centroid',
-            cell_set_obs=cell_set_obs,
-            cell_set_obs_names=cell_set_obs_names,
-            expression_matrix="X"
+            adata_path=zarr_filepath,
+            obs_feature_matrix_path="X",    # FIXME: provide rep options
+            obs_set_paths=['obs/' + x for x in cell_set_obs],
+            obs_set_names=cell_set_obs_names,
+            obs_locations_path='spatial',
+            obs_embedding_paths=['obsm/' + mappings_obsm],
+            obs_embedding_names=[mappings_obsm_name]
         )
     )
 
-    cellsets = vc.add_view(dataset, cm.CELL_SETS)
-    scattorplot = vc.add_view(dataset, cm.SCATTERPLOT, mapping=mappings_obsm_name)
-    heatmap = vc.add_view(dataset, cm.HEATMAP)
-    genes = vc.add_view(dataset, cm.GENES)
-    cell_set_sizes = vc.add_view(dataset, cm.CELL_SET_SIZES)
-    cell_set_expression = vc.add_view(dataset, cm.CELL_SET_EXPRESSION)
+    # add views
+    cellsets = vc.add_view(
+        view_type=cm.OBS_SETS,
+        dataset=dataset,
+        w=3,
+        h=3)
+
+    scatterplot = vc.add_view(
+        view_type=cm.SCATTERPLOT,
+        dataset=dataset,
+        mapping=mappings_obsm_name,
+        w=3,
+        h=6)
+
+    heatmap = vc.add_view(
+        view_type=cm.HEATMAP,
+        dataset=dataset,
+        w=3,
+        h=3)
+
+    genes = vc.add_view(
+        view_type=cm.FEATURE_LIST,
+        dataset=dataset,
+        w=3,
+        h=3)
+
+    cell_set_sizes = vc.add_view(
+        view_type=cm.OBS_SET_SIZES,
+        dataset=dataset,
+        w=3,
+        h=3)
+
+    cell_set_expression = vc.add_view(
+        view_type=cm.OBS_SET_FEATURE_VALUE_DISTRIBUTION,
+        dataset=dataset,
+        w=3,
+        h=6)
+
+    # define the dashboard layout
     vc.layout(
-        (status / genes / cell_set_expression)
-        | (cellsets / lc / scattorplot)
+        (cellsets / genes / cell_set_expression)
+        | (lc / scatterplot)
         | (cell_set_sizes / heatmap / spatial)
     )
-    config_dict = vc.export(to='files', base_url='http://localhost', out_dir=output)
+
+    # export the config file
+    config_dict = vc.export(
+        to='files',
+        base_url='http://localhost',
+        out_dir=output)
 
     with open(Path(output).joinpath('config.json'), 'w') as f:
         json.dump(config_dict, f, indent=4)
@@ -113,9 +203,10 @@
     aparser.add_argument("-i", "--inputs", dest="inputs", required=True)
     aparser.add_argument("-e", "--output", dest="output", required=True)
     aparser.add_argument("-g", "--image", dest="image", required=True)
+    aparser.add_argument("-f", "--offsets", dest="offsets", required=False)
     aparser.add_argument("-a", "--anndata", dest="anndata", required=False)
     aparser.add_argument("-m", "--masks", dest="masks", required=False)
 
     args = aparser.parse_args()
 
-    main(args.inputs, args.output, args.image, args.anndata, args.masks)
+    main(args.inputs, args.output, args.image, args.offsets, args.anndata, args.masks)