decoupler_pathway_inference: decoupler

comparison decoupler_pseudobulk.py @ 7:2c5686d627c0 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9

author	ebi-gxa
date	Sun, 27 Oct 2024 20:39:33 +0000
parents	1d140c4a5875
children	db14ac3f6b43

comparison

equal deleted inserted replaced

-:1d140c4a5875
+:2c5686d627c0
 >>> import pandas as pd
 >>> import numpy as np
 >>> import os
 >>> from io import StringIO
 >>> contrast_file = StringIO(f"contrast{os.linesep}condition1-\
-condition2{os.linesep}")
+condition2{os.linesep}\
+2*(condition1)-condition2{os.linesep}")
 >>> min_perc_cells_expression = 30.0
 >>> data = {
 ...     'obs': pd.DataFrame({'condition': ['condition1', 'condition1',
 ...                          'condition2', 'condition2']}),
 ...     'X': np.array([[1, 0, 0, 0, 0], [0, 0, 2, 2, 0],
 >>> df = identify_genes_to_filter_per_contrast(
 ...     contrast_file, min_perc_cells_expression, adata, 'condition'
 ... ) # doctest:+ELLIPSIS
 Identifying genes to filter using ...
 >>> df.head() # doctest:+ELLIPSIS
 contrast gene
-0  condition1-condition2    ...
+0      condition1-condition2...
-1  condition1-condition2    ...
+1      condition1-condition2...
+2  2*(condition1)-condition2...
+3  2*(condition1)-condition2...
 """
 import re
 # Implement the logic to identify genes to filter per contrast
 # This is a placeholder implementation
 contrasts = pd.read_csv(contrast_file, sep="\t")
 # Iterate over each line in the contrast file
 genes_filter_for_contrast = dict()
 for contrast in contrasts.iloc[:, 0]:
 conditions = set(sides_regex.split(contrast))
+selected_conditions = []
+failed_conditions = []
+for condition in conditions:
+# remove any starting or trailing whitespaces from condition
+condition = condition.strip()
+if len(condition) == 0:
+continue
+# check if the condition is simply a number, then skip it
+if condition.isnumeric():
+continue
+if condition not in adata.obs[obs_field].unique():
+# add condition to failed_conditions
+failed_conditions.append(condition)
+continue
+# append to selected_conditions
+selected_conditions.append(condition)
+if len(failed_conditions) > 0:
+raise ValueError(
+f"Condition(s) '{failed_conditions}' "
+f"from contrast {contrast} "
+f"is/are not present in the "
+f"obs_field '{obs_field}' from the AnnData object."
+f"Possible values are: "
+f"{', '.join(adata.obs[obs_field].unique())}.")
 # we want to find the genes that are below the threshold
 # of % of cells expressed for ALL the conditions in the
 # contrast. It is enough for one of the conditions
 # of the contrast to have the genes expressed above
 # the threshold of % of cells to be of interest.
-for condition in conditions:
+for condition in selected_conditions:
-# remove any starting or trailing whitespaces from condition
-condition = condition.strip()
-if condition not in adata.obs[obs_field].unique():
-raise ValueError(
-f"Condition '{condition}' from contrast {contrast}"
-f" is not present in the "
-f"obs_field '{obs_field}' from the AnnData object."
-f"Possible values are: "
-f"{', '.join(adata.obs[obs_field].unique())}."
-)
 # check the percentage of cells that express each gene
 # Filter the AnnData object based on the obs_field value
 adata_filtered = adata[adata.obs[obs_field] == condition]
 # Calculate the percentage of cells expressing each gene
 gene_expression = (adata_filtered.X > 0).mean(axis=0) * 100

Mercurial > repos > ebi-gxa > decoupler_pathway_inference

comparison decoupler_pseudobulk.py @ 7:2c5686d627c0 draft