Mercurial > repos > perssond > quantification

--- a/CommandSingleCellExtraction.py	Thu Apr 07 16:54:04 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-#Script for parsing command line arguments and running single-cell
-#data extraction functions
-#Joshua Hess
-import ParseInput
-import SingleCellDataExtraction
-
-#Parse the command line arguments
-args = ParseInput.ParseInputDataExtract()
-
-#Run the MultiExtractSingleCells function
-SingleCellDataExtraction.MultiExtractSingleCells(**args)
--- a/ParseInput.py	Thu Apr 07 16:54:04 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-#Functions for parsing command line arguments for ome ilastik prep
-import argparse
-
-
-def ParseInputDataExtract():
-   """Function for parsing command line arguments for input to single-cell
-   data extraction"""
-
-#if __name__ == '__main__':
-   parser = argparse.ArgumentParser()
-   parser.add_argument('--masks',nargs='+', required=True)
-   parser.add_argument('--image', required=True)
-   parser.add_argument('--channel_names', required=True)
-   parser.add_argument('--output', required=True)
-   parser.add_argument(
-      '--mask_props', nargs = "+",
-      help="""
-         Space separated list of additional metrics to be calculated for every mask.
-         This is for metrics that depend only on the cell mask. If the metric depends
-         on signal intensity, use --intensity-props instead.
-         See list at https://scikit-image.org/docs/dev/api/skimage.measure.html#regionprops
-      """
-   )
-   parser.add_argument(
-      '--intensity_props', nargs = "+",
-      help="""
-         Space separated list of additional metrics to be calculated for every marker separately.
-         By default only mean intensity is calculated.
-         If the metric doesn't depend on signal intensity, use --mask-props instead.
-         See list at https://scikit-image.org/docs/dev/api/skimage.measure.html#regionprops
-         Additionally available is gini_index, which calculates a single number
-         between 0 and 1, representing how unequal the signal is distributed in each region.
-         See https://en.wikipedia.org/wiki/Gini_coefficient
-      """
-   )
-   #parser.add_argument('--suffix')
-   args = parser.parse_args()
-   #Create a dictionary object to pass to the next function
-   dict = {'masks': args.masks, 'image': args.image,\
-    'channel_names': args.channel_names,'output':args.output,
-    'intensity_props': set(args.intensity_props if args.intensity_props is not None else []).union(["intensity_mean"]),
-    'mask_props': args.mask_props,
-   }
-   #Print the dictionary object
-   print(dict)
-   #Return the dictionary
-   return dict
--- a/SingleCellDataExtraction.py	Thu Apr 07 16:54:04 2022 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,269 +0,0 @@
-#Functions for reading in single cell imaging data
-#Joshua Hess
-
-#Import necessary modules
-import skimage.io
-import h5py
-import pandas as pd
-import numpy as np
-import os
-import skimage.measure as measure
-import tifffile
-
-from pathlib import Path
-
-import sys
-
-
-def gini_index(mask, intensity):
-    x = intensity[mask]
-    sorted_x = np.sort(x)
-    n = len(x)
-    cumx = np.cumsum(sorted_x, dtype=float)
-    return (n + 1 - 2 * np.sum(cumx) / cumx[-1]) / n
-
-def intensity_median(mask, intensity):
-    return np.median(intensity[mask])
-
-def MaskChannel(mask_loaded, image_loaded_z, intensity_props=["intensity_mean"]):
-    """Function for quantifying a single channel image
-
-    Returns a table with CellID according to the mask and the mean pixel intensity
-    for the given channel for each cell"""
-    # Look for regionprops in skimage
-    builtin_props = set(intensity_props).intersection(measure._regionprops.PROP_VALS)
-    # Otherwise look for them in this module
-    extra_props = set(intensity_props).difference(measure._regionprops.PROP_VALS)
-    dat = measure.regionprops_table(
-        mask_loaded, image_loaded_z,
-        properties = tuple(builtin_props),
-        extra_properties = [globals()[n] for n in extra_props]
-    )
-    return dat
-
-
-def MaskIDs(mask, mask_props=None):
-    """This function will extract the CellIDs and the XY positions for each
-    cell based on that cells centroid
-
-    Returns a dictionary object"""
-
-    all_mask_props = set(["label", "centroid", "area", "major_axis_length", "minor_axis_length", "eccentricity", "solidity", "extent", "orientation"])
-    if mask_props is not None:
-        all_mask_props = all_mask_props.union(mask_props)
-
-    dat = measure.regionprops_table(
-        mask,
-        properties=all_mask_props
-    )
-
-    name_map = {
-        "CellID": "label",
-        "X_centroid": "centroid-1",
-        "Y_centroid": "centroid-0",
-        "Area": "area",
-        "MajorAxisLength": "major_axis_length",
-        "MinorAxisLength": "minor_axis_length",
-        "Eccentricity": "eccentricity",
-        "Solidity": "solidity",
-        "Extent": "extent",
-        "Orientation": "orientation",
-    }
-    for new_name, old_name in name_map.items():
-        dat[new_name] = dat[old_name]
-    for old_name in set(name_map.values()):
-        del dat[old_name]
-
-    return dat
-
-def n_channels(image):
-    """Returns the number of channel in the input image. Supports [OME]TIFF and HDF5."""
-
-    image_path = Path(image)
-
-    if image_path.suffix in ['.tiff', '.tif', '.btf']:
-        s = tifffile.TiffFile(image).series[0]
-        ndim = len(s.shape)
-        if ndim == 2: return 1
-        elif ndim == 3: return min(s.shape)
-        else: raise Exception('mcquant supports only 2D/3D images.')
-
-    elif image_path.suffix in ['.h5', '.hdf5']:
-        f = h5py.File(image, 'r')
-        dat_name = list(f.keys())[0]
-        return f[dat_name].shape[3]
-
-    else:
-        raise Exception('mcquant currently supports [OME]TIFF and HDF5 formats only')
-
-def PrepareData(image,z):
-    """Function for preparing input for maskzstack function. Connecting function
-    to use with mc micro ilastik pipeline"""
-
-    image_path = Path(image)
-    print(f'{image_path} at {z}', file=sys.stderr)
-
-    #Check to see if image tif(f)
-    if image_path.suffix in ['.tiff', '.tif', '.btf']:
-        image_loaded_z = tifffile.imread(image, key=z)
-
-    #Check to see if image is hdf5
-    elif image_path.suffix in ['.h5', '.hdf5']:
-        #Read the image
-        f = h5py.File(image,'r')
-        #Get the dataset name from the h5 file
-        dat_name = list(f.keys())[0]
-        #Retrieve the z^th channel
-        image_loaded_z = f[dat_name][0,:,:,z]
-
-    else:
-        raise Exception('mcquant currently supports [OME]TIFF and HDF5 formats only')
-
-    #Return the objects
-    return image_loaded_z
-
-
-def MaskZstack(masks_loaded,image,channel_names_loaded, mask_props=None, intensity_props=["intensity_mean"]):
-    """This function will extract the stats for each cell mask through each channel
-    in the input image
-
-    mask_loaded: dictionary containing Tiff masks that represents the cells in your image.
-
-    z_stack: Multichannel z stack image"""
-
-    #Get the names of the keys for the masks dictionary
-    mask_names = list(masks_loaded.keys())
-
-    #Create empty dictionary to store channel results per mask
-    dict_of_chan = {m_name: [] for m_name in mask_names}
-    #Get the z channel and the associated channel name from list of channel names
-    print(f'channels: {channel_names_loaded}', file=sys.stderr)
-    print(f'num channels: {len(channel_names_loaded)}', file=sys.stderr)
-    for z in range(len(channel_names_loaded)):
-        #Run the data Prep function
-        image_loaded_z = PrepareData(image,z)
-
-        #Iterate through number of masks to extract single cell data
-        for nm in range(len(mask_names)):
-            #Use the above information to mask z stack
-            dict_of_chan[mask_names[nm]].append(
-                MaskChannel(masks_loaded[mask_names[nm]],image_loaded_z, intensity_props=intensity_props)
-            )
-        #Print progress
-        print("Finished "+str(z))
-
-    # Column order according to histoCAT convention (Move xy position to end with spatial information)
-    last_cols = (
-        "X_centroid",
-        "Y_centroid",
-        "column_centroid",
-        "row_centroid",
-        "Area",
-        "MajorAxisLength",
-        "MinorAxisLength",
-        "Eccentricity",
-        "Solidity",
-        "Extent",
-        "Orientation",
-    )
-    def col_sort(x):
-        if x == "CellID":
-            return -2
-        try:
-            return last_cols.index(x)
-        except ValueError:
-            return -1
-
-    #Iterate through the masks and format quantifications for each mask and property
-    for nm in mask_names:
-        mask_dict = {}
-        # Mean intensity is default property, stored without suffix
-        mask_dict.update(
-            zip(channel_names_loaded, [x["intensity_mean"] for x in dict_of_chan[nm]])
-        )
-        # All other properties are suffixed with their names
-        for prop_n in set(dict_of_chan[nm][0].keys()).difference(["intensity_mean"]):
-            mask_dict.update(
-                zip([f"{n}_{prop_n}" for n in channel_names_loaded], [x[prop_n] for x in dict_of_chan[nm]])
-            )
-        # Get the cell IDs and mask properties
-        mask_properties = pd.DataFrame(MaskIDs(masks_loaded[nm], mask_props=mask_props))
-        mask_dict.update(mask_properties)
-        dict_of_chan[nm] = pd.DataFrame(mask_dict).reindex(columns=sorted(mask_dict.keys(), key=col_sort))
-
-    # Return the dict of dataframes for each mask
-    return dict_of_chan
-
-def ExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]):
-    """Function for extracting single cell information from input
-    path containing single-cell masks, z_stack path, and channel_names path."""
-
-    #Create pathlib object for output
-    output = Path(output)
-
-    #Read csv channel names
-    channel_names_loaded = pd.read_csv(channel_names)
-    #Check for the presence of `marker_name` column
-    if 'marker_name' in channel_names_loaded:
-        #Get the marker_name column if more than one column (CyCIF structure)
-        channel_names_loaded_list = list(channel_names_loaded.marker_name)
-    #Consider the old one-marker-per-line plain text format
-    elif channel_names_loaded.shape[1] == 1:
-        #re-read the csv file and add column name
-        channel_names_loaded = pd.read_csv(channel_names, header = None)
-        channel_names_loaded_list = list(channel_names_loaded.iloc[:,0])
-    else:
-        raise Exception('%s must contain the marker_name column'%channel_names)
-
-    #Contrast against the number of markers in the image
-    if len(channel_names_loaded_list) != n_channels(image):
-        raise Exception("The number of channels in %s doesn't match the image"%channel_names)
-
-    #Check for unique marker names -- create new list to store new names
-    channel_names_loaded_checked = []
-    for idx,val in enumerate(channel_names_loaded_list):
-        #Check for unique value
-        if channel_names_loaded_list.count(val) > 1:
-            #If unique count greater than one, add suffix
-            channel_names_loaded_checked.append(val + "_"+ str(channel_names_loaded_list[:idx].count(val) + 1))
-        else:
-            #Otherwise, leave channel name
-            channel_names_loaded_checked.append(val)
-
-    #Read the masks
-    masks_loaded = {}
-    #iterate through mask paths and read images to add to dictionary object
-    for m in masks:
-        m_full_name = os.path.basename(m)
-        m_name = m_full_name.split('.')[0]
-        masks_loaded.update({str(m_name):skimage.io.imread(m,plugin='tifffile')})
-
-    scdata_z = MaskZstack(masks_loaded,image,channel_names_loaded_checked, mask_props=mask_props, intensity_props=intensity_props)
-    #Write the singe cell data to a csv file using the image name
-
-    im_full_name = os.path.basename(image)
-    im_name = im_full_name.split('.')[0]
-
-    # iterate through each mask and export csv with mask name as suffix
-    for k,v in scdata_z.items():
-        # export the csv for this mask name
-        scdata_z[k].to_csv(
-                            str(Path(os.path.join(str(output),
-                            str(im_name+"_{}"+".csv").format(k)))),
-                            index=False
-                            )
-
-
-def MultiExtractSingleCells(masks,image,channel_names,output, mask_props=None, intensity_props=["intensity_mean"]):
-    """Function for iterating over a list of z_stacks and output locations to
-    export single-cell data from image masks"""
-
-    print("Extracting single-cell data for "+str(image)+'...')
-
-    #Run the ExtractSingleCells function for this image
-    ExtractSingleCells(masks,image,channel_names,output, mask_props=mask_props, intensity_props=intensity_props)
-
-    #Print update
-    im_full_name = os.path.basename(image)
-    im_name = im_full_name.split('.')[0]
-    print("Finished "+str(im_name))
--- a/macros.xml	Thu Apr 07 16:54:04 2022 +0000
+++ b/macros.xml	Tue Sep 06 23:18:12 2022 +0000
@@ -2,31 +2,35 @@
 <macros>
     <xml name="requirements">
         <requirements>
-            <container type="docker">labsyspharm/quantification:@VERSION@</container>
+            <!--
             <requirement type="package" version="3.9">python</requirement>
             <requirement type="package" version="0.18.0">scikit-image</requirement>
             <requirement type="package">h5py</requirement>
             <requirement type="package">pandas</requirement>
             <requirement type="package">numpy</requirement>
             <requirement type="package">pathlib</requirement>
+            -->
+            <container type="docker">labsyspharm/quantification:@TOOL_VERSION@</container>
         </requirements>
     </xml>

     <xml name="version_cmd">
-        <version_command>echo @VERSION@</version_command>
+        <version_command>echo @TOOL_VERSION@</version_command>
     </xml>
     <xml name="citations">
         <citations>
         </citations>
     </xml>

-    <token name="@VERSION@">1.5.1</token>
+    <token name="@TOOL_VERSION@">1.5.1</token>
+    <token name="@VERSION_SUFFIX@">0</token>
     <token name="@CMD_BEGIN@"><![CDATA[
-    QUANT_PATH="";
-    if [ -f "/app/CommandSingleCellExtraction.py" ]; then
-        export QUANT_PATH="/app/CommandSingleCellExtraction.py";
+    QUANT_PATH='' &&
+    if [ -f '/app/CommandSingleCellExtraction.py' ]; then
+        export QUANT_PATH='python /app/CommandSingleCellExtraction.py';
     else
-        export QUANT_PATH="${__tool_directory__}/CommandSingleCellExtraction.py";
-    fi;
+        export QUANT_PATH='CommandSingleCellExtraction.py';
+    fi &&
+    \$QUANT_PATH
     ]]></token>
 </macros>
--- a/quantification.xml	Thu Apr 07 16:54:04 2022 +0000
+++ b/quantification.xml	Tue Sep 06 23:18:12 2022 +0000
@@ -1,83 +1,94 @@
-<tool id="quantification" name="Quantification" version="@VERSION@.7" profile="17.09">
-    <description>Single cell quantification, a module for single-cell data extraction given a segmentation mask and multi-channel image.</description>
+<tool id="quantification" name="MCQUANT" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="19.01">
+    <description>a module for single-cell data extraction</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-
     <expand macro="requirements"/>
-    @VERSION_CMD@
+    <expand macro="version_cmd"/>

     <command detect_errors="exit_code"><![CDATA[
-    ln -s $image input.ome.tiff;
-    ln -s $primary_mask primary_mask.tiff;
-    #if $supp_masks
-    ln -s $supp_masks supp_mask.tiff;
+    ln -s '$image' 'input.ome.tiff' &&
+    ln -s '$primary_mask' 'primary_mask.tiff' &&
+    #if $supp_mask
+        ln -s '$supp_mask' 'supp_mask.tiff' &&
     #end if

-    mkdir ./tool_out;
+    mkdir './tool_out' &&

     @CMD_BEGIN@

-    python \$QUANT_PATH
-    --masks
-    primary_mask.tiff
-    #if $supp_masks
-    supp_mask.tiff
+    --masks 'primary_mask.tiff'
+    #if $supp_mask
+        'supp_mask.tiff'
     #end if

-    --image input.ome.tiff
-    --output ./tool_out
+    --image 'input.ome.tiff'
+    --output './tool_out'

-    #if $mask_props
-    --mask_props $mask_props
+    #if str($mask_props).strip()
+        --mask_props '$mask_props'
     #end if
-    #if $intensity_props
-    --intensity_props $intensity_props
+    #if str($intensity_props).strip()
+        --intensity_props '$intensity_props'
     #end if

-    --channel_names '$channel_names';
+    --channel_names '$channel_names' &&

-    cp tool_out/*primary_mask.csv primary_mask.csv
+    #if $supp_mask
+    mv tool_out/*supp_mask.csv supp_mask.csv &&
+    #end if
+
+    mv tool_out/*primary_mask.csv primary_mask.csv
     ]]></command>

     <inputs>
         <param name="image" type="data" format="tiff" label="Registered TIFF"/>
-        <param name="primary_mask" type="data" format="tiff" label="Primary Cell Mask"/>
-        <param name="supp_masks" type="data" optional="true" format="tiff" label="Additional Cell Masks"/>
+        <param name="primary_mask" type="data" format="tiff" label="Primary Mask"/>
+        <param name="supp_mask" type="data" optional="true" format="tiff" label="Additional Mask"/>
         <param name="channel_names" type="data" format="csv" label="Marker Channels"/>
         <param name="mask_props" type="text" label="Mask Metrics" help="Space separated list of additional metrics to be calculated for every mask."/>
         <param name="intensity_props" type="text" label="Intensity Metrics" help="Space separated list of additional metrics to be calculated for every marker separately."/>
     </inputs>

     <outputs>
-        <data format="csv" name="cellmask" from_work_dir="primary_mask.csv" label="CellMaskQuant"/>
-        <collection type="list" name="quantification" label="${tool.name} on ${on_string}">
-          <discover_datasets pattern="__designation_and_ext__" format="csv" directory="tool_out/" visible="false"/>
-        </collection>
+        <data format="csv" name="cellmask" from_work_dir="primary_mask.csv" label="Primary Mask Quantification"/>
+        <data format="csv" name="suppmask" from_work_dir="supp_mask.csv" label="Supplemental Mask Quantification">
+            <filter>supp_mask</filter>
+        </data>
      </outputs>
+     <tests>
+        <test>
+            <param name="image" value="test.tiff" />
+            <param name="primary_mask" value="mask.tiff" />
+            <param name="supp_mask" value="supp_mask.tiff" />
+            <param name="channel_names" value="channels.csv" />
+            <output name="cellmask" ftype="csv">
+                <assert_contents>
+                    <has_n_columns n="11" sep="," delta="1" />
+                </assert_contents>
+            </output>
+            <output name="suppmask" ftype="csv">
+                <assert_contents>
+                    <has_n_columns n="11" sep="," delta="1" />
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
     <help><![CDATA[
-# Single cell quantification
-Module for single-cell data extraction given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output.
-
-**CommandSingleCellExtraction.py**:
-
-* `--masks` Paths to where masks are stored (Ex: ./segmentation/cellMask.tif) -> If multiple masks are selected the first mask will be used for spatial feature extraction but all will be quantified
+
+--------
+MCQUANT
+--------
+**MCQUANT** module for single cell quantification given a segmentation mask and multi-channel image. The CSV structure is aligned with histoCAT output.

-* `--image` Path to image(s) for quantification.  (Ex: ./registration/*.h5) -> works with .h(df)5 or .tif(f)
-
-* `--output` Path to output directory. (Ex: ./feature_extraction)
-
-* `--channel_names` csv file containing the channel names for the z-stack (Ex: ./my_channels.csv)
+**Inputs**
+1. A fully stitched and registered image in .ome.tif format. Nextflow will use images in the registration/ and dearray/ subfolders as appropriate.
+2. One or more segmentation masks in .tif format. Nextflow will use files in the segmentation/ subfolder within the project.
+3. A .csv file containing a marker_name column specifying names of individual channels. Nextflow will look for this file in the project directory.

-# Run script
-`python CommandSingleCellExtraction.py --masks ./segmentation/cellMask.tif ./segmentation/membraneMask.tif --image ./registration/Exemplar_001.h5  --output ./feature_extraction --channel_names ./my_channels.csv`
+**Outputs**
+A cell-by-feature table mapping Cell IDs to marker expression and morphological features (including x,y coordinates).

-# Main developer
-Denis Schapiro (https://github.com/DenisSch)
-
-Joshua Hess (https://github.com/JoshuaHess12)
-
-Jeremy Muhlich (https://github.com/jmuhlich)
     ]]></help>
     <expand macro="citations" />
 </tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/channels.csv	Tue Sep 06 23:18:12 2022 +0000
@@ -0,0 +1,2 @@
+channel_number,cycle_number,marker_name,Filter,excitation_wavelength,emission_wavelength
+1,1,DNA_1,DAPI,395,431
Binary file test-data/mask.tiff has changed
Binary file test-data/supp_mask.tiff has changed
Binary file test-data/test.tiff has changed