annotate decoupler_pseudobulk.py @ 16:508a93e34599 draft default tip

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
author ebi-gxa
date Wed, 19 Feb 2025 16:55:58 +0000
parents a559be56720c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
1 import argparse
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
2
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
3 import anndata
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
4 import decoupler
16
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
5 import numpy as np
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
6 import pandas as pd
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
7
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
8
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
9 def get_pseudobulk(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
10 adata,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
11 sample_col,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
12 groups_col,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
13 layer=None,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
14 mode="sum",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
15 min_cells=10,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
16 min_counts=1000,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
17 use_raw=False,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
18 ):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
19 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
20 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
21 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
22 >>> adata.X = abs(adata.X).astype(int)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
23 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
24 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
25
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
26 return decoupler.get_pseudobulk(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
27 adata,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
28 sample_col=sample_col,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
29 groups_col=groups_col,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
30 layer=layer,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
31 mode=mode,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
32 use_raw=use_raw,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
33 min_cells=min_cells,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
34 min_counts=min_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
35 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
36
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
37
16
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
38 def create_pseudo_replicates(adata, sample_key, num_replicates, seed=None):
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
39 """
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
40 Create pseudo replicates for each sample in the sample_key groups.
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
41
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
42 Parameters
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
43 ----------
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
44 adata : anndata.AnnData
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
45 The AnnData object.
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
46 sample_key : str
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
47 The column in adata.obs that defines the samples.
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
48 num_replicates : int
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
49 Number of pseudo replicates to create per sample.
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
50
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
51 Returns
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
52 -------
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
53 anndata.AnnData
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
54 The AnnData object with pseudo replicates.
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
55
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
56 Examples
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
57 --------
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
58 >>> import anndata
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
59 >>> import pandas as pd
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
60 >>> import numpy as np
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
61 >>> data = {
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
62 ... 'obs': pd.DataFrame({'sample': ['A', 'A', 'B', 'B']}),
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
63 ... 'X': np.array([[1, 0], [0, 1], [1, 1], [0, 0]])
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
64 ... }
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
65 >>> adata = anndata.AnnData(X=data['X'], obs=data['obs'])
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
66 >>> adata = create_pseudo_replicates(adata, 'sample', 2)
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
67 >>> adata.obs['sample_pseudo'].tolist()
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
68 ['A_rep1', 'A_rep2', 'B_rep1', 'B_rep2']
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
69 """
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
70 if seed is not None:
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
71 np.random.seed(seed)
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
72
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
73 new_sample_key = f"{sample_key}_pseudo"
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
74 adata.obs[new_sample_key] = adata.obs[sample_key].astype(str)
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
75
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
76 for sample in adata.obs[sample_key].unique():
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
77 sample_indices = adata.obs[
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
78 adata.obs[sample_key] == sample].index.to_numpy()
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
79 np.random.shuffle(sample_indices) # Shuffle the indices to randomize
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
80 replicate_size = int(len(sample_indices) / num_replicates)
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
81 for i in range(num_replicates):
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
82 start_idx = i * replicate_size
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
83 end_idx = start_idx + replicate_size
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
84 replicate_indices = sample_indices[start_idx:end_idx]
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
85 adata.obs.loc[replicate_indices, new_sample_key] = (
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
86 adata.obs.loc[replicate_indices, new_sample_key] + f"_rep{i+1}"
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
87 )
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
88
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
89 return adata
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
90
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
91
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
92 def prepend_c_to_index(index_value):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
93 if index_value and index_value[0].isdigit():
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
94 return "C" + index_value
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
95 return index_value
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
96
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
97
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
98 def genes_to_ignore_per_contrast_field(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
99 count_matrix_df,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
100 samples_metadata,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
101 sample_metadata_col_contrasts,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
102 min_counts_per_sample=5,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
103 use_cpms=False,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
104 ):
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
105 """
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
106 # This function calculates the genes to ignore per contrast field
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
107 # (e.g., bulk_labels, louvain).
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
108 # It does this by first getting the count matrix for each group,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
109 # then identifying genes with a count below a specified threshold.
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
110 # The genes to ignore are those that are present in more than a specified
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
111 # number of groups.
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
112
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
113 >>> import pandas as pd
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
114 >>> samples_metadata = pd.DataFrame({'sample':
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
115 ... ['S1', 'S2', 'S3',
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
116 ... 'S4', 'S5', 'S6'],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
117 ... 'contrast_field':
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
118 ... ['A', 'A', 'A', 'B', 'B', 'B']})
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
119 >>> count_matrix_df = pd.DataFrame(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
120 ... {'S1':
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
121 ... [30, 1, 40, 50, 30],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
122 ... 'S2':
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
123 ... [40, 2, 60, 50, 80],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
124 ... 'S3':
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
125 ... [80, 1, 60, 50, 50],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
126 ... 'S4': [1, 50, 50, 50, 2],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
127 ... 'S5': [3, 40, 40, 40, 2],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
128 ... 'S6': [0, 50, 50, 50, 1]})
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
129 >>> count_matrix_df.index = ['Gene1', 'Gene2', 'Gene3', 'Gene4', 'Gene5']
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
130 >>> df = genes_to_ignore_per_contrast_field(count_matrix_df,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
131 ... samples_metadata, min_counts_per_sample=5,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
132 ... sample_metadata_col_contrasts='contrast_field')
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
133 >>> df[df['contrast_field'] == 'A'].genes_to_ignore.tolist()[0]
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
134 'Gene2'
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
135 >>> df[df['contrast_field'] == 'B'].genes_to_ignore.tolist()[0]
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
136 'Gene1'
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
137 >>> df[df['contrast_field'] == 'B'].genes_to_ignore.tolist()[1]
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
138 'Gene5'
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
139 """
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
140
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
141 # Initialize a dictionary to store the genes to ignore per contrast field
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
142 contrast_fields = []
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
143 genes_to_ignore = []
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
144
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
145 # Iterate over the contrast fields
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
146 for contrast_field in samples_metadata[
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
147 sample_metadata_col_contrasts
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
148 ].unique():
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
149 # Get the count matrix for the current contrast field
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
150 count_matrix_field = count_matrix_df.loc[
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
151 :,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
152 (
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
153 samples_metadata[sample_metadata_col_contrasts]
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
154 == contrast_field
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
155 ).tolist(),
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
156 ]
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
157
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
158 # We derive min_counts from the number of samples with that
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
159 # contrast_field value
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
160 min_counts = count_matrix_field.shape[1] * min_counts_per_sample
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
161
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
162 if use_cpms:
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
163 # Convert counts to counts per million (CPM)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
164 count_matrix_field = (
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
165 count_matrix_field.div(count_matrix_field.sum(axis=1), axis=0)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
166 * 1e6
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
167 )
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
168 min_counts = 1 # use 1 CPM
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
169
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
170 # Calculate the total number of cells in the current contrast field
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
171 # (this produces a vector of counts per gene)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
172 total_counts_per_gene = count_matrix_field.sum(axis=1)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
173
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
174 # Identify genes with a count below the specified threshold
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
175 genes = total_counts_per_gene[
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
176 total_counts_per_gene < min_counts
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
177 ].index.tolist()
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
178 if len(genes) > 0:
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
179 # genes_to_ignore[contrast_field] = " ".join(genes)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
180 for gene in genes:
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
181 genes_to_ignore.append(gene)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
182 contrast_fields.append(contrast_field)
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
183 # transform gene_to_ignore to a DataFrame
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
184 # genes_to_ignore_df = pd.DataFrame(genes_to_ignore.items(),
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
185 # columns=["contrast_field", "genes_to_ignore"])
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
186 genes_to_ignore_df = pd.DataFrame(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
187 {"contrast_field": contrast_fields, "genes_to_ignore": genes_to_ignore}
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
188 )
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
189 return genes_to_ignore_df
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
190
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
191
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
192 # write results for loading into DESeq2
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
193 def write_DESeq2_inputs(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
194 pdata,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
195 layer=None,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
196 output_dir="",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
197 factor_fields=None,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
198 min_counts_per_sample_marking=20,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
199 ):
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
200 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
201 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
202 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
203 >>> adata.X = abs(adata.X).astype(int)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
204 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
205 >>> write_DESeq2_inputs(pseudobulk)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
206 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
207 # add / to output_dir if is not empty or if it doesn't end with /
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
208 if output_dir != "" and not output_dir.endswith("/"):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
209 output_dir = output_dir + "/"
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
210 obs_for_deseq = pdata.obs.copy()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
211 # replace any index starting with digits to start with C instead.
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
212 obs_for_deseq.rename(index=prepend_c_to_index, inplace=True)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
213 # avoid dash that is read as point on R colnames.
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
214 obs_for_deseq.index = obs_for_deseq.index.str.replace("-", "_")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
215 obs_for_deseq.index = obs_for_deseq.index.str.replace(" ", "_")
1
046d8ff974ff planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 45145f380b27c3092e1fa2249adc36d7d6fdf5fe
ebi-gxa
parents: 0
diff changeset
216 col_metadata_file = f"{output_dir}col_metadata.tsv"
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
217 # write obs to a col_metadata file
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
218 if factor_fields:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
219 # only output the index plus the columns in factor_fields in that order
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
220 obs_for_deseq[factor_fields].to_csv(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
221 col_metadata_file, sep="\t", index=True
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
222 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
223 else:
1
046d8ff974ff planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 45145f380b27c3092e1fa2249adc36d7d6fdf5fe
ebi-gxa
parents: 0
diff changeset
224 obs_for_deseq.to_csv(col_metadata_file, sep="\t", index=True)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
225 # write var to a gene_metadata file
1
046d8ff974ff planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 45145f380b27c3092e1fa2249adc36d7d6fdf5fe
ebi-gxa
parents: 0
diff changeset
226 pdata.var.to_csv(f"{output_dir}gene_metadata.tsv", sep="\t", index=True)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
227 # write the counts matrix of a specified layer to file
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
228 if layer is None:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
229 # write the X numpy matrix transposed to file
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
230 df = pd.DataFrame(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
231 pdata.X.T, index=pdata.var.index, columns=obs_for_deseq.index
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
232 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
233 else:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
234 df = pd.DataFrame(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
235 pdata.layers[layer].T,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
236 index=pdata.var.index,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
237 columns=obs_for_deseq.index,
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
238 )
1
046d8ff974ff planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 45145f380b27c3092e1fa2249adc36d7d6fdf5fe
ebi-gxa
parents: 0
diff changeset
239 df.to_csv(f"{output_dir}counts_matrix.tsv", sep="\t", index_label="")
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
240
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
241 if factor_fields:
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
242 df_genes_ignore = genes_to_ignore_per_contrast_field(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
243 count_matrix_df=df,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
244 samples_metadata=obs_for_deseq,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
245 sample_metadata_col_contrasts=factor_fields[0],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
246 min_counts_per_sample=min_counts_per_sample_marking,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
247 )
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
248 df_genes_ignore.to_csv(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
249 f"{output_dir}genes_to_ignore_per_contrast_field.tsv", sep="\t"
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
250 )
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
251
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
252
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
253 def plot_pseudobulk_samples(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
254 pseudobulk_data,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
255 groupby,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
256 figsize=(10, 10),
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
257 save_path=None,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
258 ):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
259 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
260 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
261 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
262 >>> adata.X = abs(adata.X).astype(int)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
263 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
264 >>> plot_pseudobulk_samples(pseudobulk,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
265 ... groupby=["bulk_labels", "louvain"],
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
266 ... figsize=(10, 10))
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
267 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
268 fig = decoupler.plot_psbulk_samples(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
269 pseudobulk_data, groupby=groupby, figsize=figsize, return_fig=True
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
270 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
271 if save_path:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
272 fig.savefig(f"{save_path}/pseudobulk_samples.png")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
273 else:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
274 fig.show()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
275
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
276
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
277 def plot_filter_by_expr(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
278 pseudobulk_data,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
279 group,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
280 min_count=None,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
281 min_total_count=None,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
282 save_path=None,
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
283 ):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
284 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
285 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
286 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
287 >>> adata.X = abs(adata.X).astype(int)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
288 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
289 >>> plot_filter_by_expr(pseudobulk, group="bulk_labels",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
290 ... min_count=10, min_total_count=200)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
291 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
292 fig = decoupler.plot_filter_by_expr(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
293 pseudobulk_data,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
294 group=group,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
295 min_count=min_count,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
296 min_total_count=min_total_count,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
297 return_fig=True,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
298 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
299 if save_path:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
300 fig.savefig(f"{save_path}/filter_by_expr.png")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
301 else:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
302 fig.show()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
303
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
304
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
305 def filter_by_expr(pdata, min_count=None, min_total_count=None):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
306 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
307 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
308 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
309 >>> adata.X = abs(adata.X).astype(int)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
310 >>> pseudobulk = get_pseudobulk(adata, "bulk_labels", "louvain")
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
311 >>> pdata_filt = filter_by_expr(pseudobulk,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
312 ... min_count=10, min_total_count=200)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
313 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
314 genes = decoupler.filter_by_expr(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
315 pdata, min_count=min_count, min_total_count=min_total_count
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
316 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
317 return pdata[:, genes].copy()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
318
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
319
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
320 def check_fields(fields, adata, obs=True, context=None):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
321 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
322 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
323 >>> adata = sc.datasets.pbmc68k_reduced()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
324 >>> check_fields(["bulk_labels", "louvain"], adata, obs=True)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
325 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
326
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
327 legend = ""
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
328 if context:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
329 legend = f", passed in {context},"
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
330 if obs:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
331 if not set(fields).issubset(set(adata.obs.columns)):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
332 raise ValueError(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
333 f"Some of the following fields {legend} are not present \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
334 in adata.obs: {fields}. \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
335 Possible fields are: {list(set(adata.obs.columns))}"
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
336 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
337 else:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
338 if not set(fields).issubset(set(adata.var.columns)):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
339 raise ValueError(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
340 f"Some of the following fields {legend} are not present \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
341 in adata.var: {fields}. \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
342 Possible fields are: {list(set(adata.var.columns))}"
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
343 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
344
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
345
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
346 def main(args):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
347 # Load AnnData object from file
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
348 adata = anndata.read_h5ad(args.adata_file)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
349
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
350 # Merge adata.obs fields specified in args.adata_obs_fields_to_merge
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
351 if args.adata_obs_fields_to_merge:
2
130e25d3ce92 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 85a3118a571246d88aaad26733d0c62009cb736b
ebi-gxa
parents: 1
diff changeset
352 # first split potential groups by ":" and iterate over them
130e25d3ce92 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 85a3118a571246d88aaad26733d0c62009cb736b
ebi-gxa
parents: 1
diff changeset
353 for group in args.adata_obs_fields_to_merge.split(":"):
130e25d3ce92 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 85a3118a571246d88aaad26733d0c62009cb736b
ebi-gxa
parents: 1
diff changeset
354 fields = group.split(",")
130e25d3ce92 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 85a3118a571246d88aaad26733d0c62009cb736b
ebi-gxa
parents: 1
diff changeset
355 check_fields(fields, adata)
10
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
356 merge_adata_obs_fields(fields, adata)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
357
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
358 check_fields([args.groupby, args.sample_key], adata)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
359
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
360 factor_fields = None
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
361 if args.factor_fields:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
362 factor_fields = args.factor_fields.split(",")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
363 check_fields(factor_fields, adata)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
364
16
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
365 # Create pseudo replicates if specified
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
366 if args.num_pseudo_replicates:
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
367 adata = create_pseudo_replicates(
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
368 adata, args.sample_key, args.num_pseudo_replicates, seed=args.seed
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
369 )
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
370 args.sample_key = f"{args.sample_key}_pseudo"
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
371
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
372 print(f"Using mode: {args.mode}")
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
373 # Perform pseudobulk analysis
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
374 pseudobulk_data = get_pseudobulk(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
375 adata,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
376 sample_col=args.sample_key,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
377 groups_col=args.groupby,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
378 layer=args.layer,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
379 mode=args.mode,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
380 use_raw=args.use_raw,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
381 min_cells=args.min_cells,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
382 min_counts=args.min_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
383 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
384
9
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
385 print("Created pseudo-bulk AnnData, checking if fields still make sense.")
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
386 print(
10
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
387 "If this fails this check, it might mean that you asked for factors \
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
388 that are not compatible with you sample identifiers (ie. asked for \
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
389 phase in the factors, but each sample contains more than one phase,\
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
390 try joining fields)."
9
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
391 )
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
392 if factor_fields:
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
393 check_fields(
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
394 factor_fields,
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
395 pseudobulk_data,
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
396 context=" after creation of pseudo-bulk AnnData",
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
397 )
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
398 print("Factors requested are adequate for the pseudo-bulked AnnData!")
bd4b54b75888 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 54818daabaf1251642a267e5766f13741cb7faeb
ebi-gxa
parents: 8
diff changeset
399
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
400 # Plot pseudobulk samples
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
401 plot_pseudobulk_samples(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
402 pseudobulk_data,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
403 args.groupby,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
404 save_path=args.save_path,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
405 figsize=args.plot_samples_figsize,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
406 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
407
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
408 plot_filter_by_expr(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
409 pseudobulk_data,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
410 group=args.groupby,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
411 min_count=args.min_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
412 min_total_count=args.min_total_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
413 save_path=args.save_path,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
414 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
415
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
416 # Filter by expression if enabled
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
417 if args.filter_expr:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
418 filtered_adata = filter_by_expr(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
419 pseudobulk_data,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
420 min_count=args.min_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
421 min_total_count=args.min_total_counts,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
422 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
423
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
424 pseudobulk_data = filtered_adata
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
425
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
426 # Save the pseudobulk data
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
427 if args.anndata_output_path:
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
428 pseudobulk_data.write_h5ad(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
429 args.anndata_output_path, compression="gzip"
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
430 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
431
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
432 write_DESeq2_inputs(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
433 pseudobulk_data,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
434 output_dir=args.deseq2_output_path,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
435 factor_fields=factor_fields,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
436 min_counts_per_sample_marking=args.min_counts_per_sample_marking,
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
437 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
438
10
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
439 # if contrasts file is provided, produce a file with genes that should be
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
440 # filtered for each contrasts
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
441 if args.contrasts_file:
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
442 contrast_genes_df = identify_genes_to_filter_per_contrast(
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
443 contrast_file=args.contrasts_file,
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
444 min_perc_cells_expression=args.min_gene_exp_perc_per_cell,
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
445 adata=adata,
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
446 obs_field=args.groupby
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
447 )
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
448 contrast_genes_df.to_csv(
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
449 f"{args.save_path}/genes_to_filter_by_contrast.tsv",
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
450 sep="\t",
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
451 index=False,
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
452 )
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
453
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
454
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
455 def merge_adata_obs_fields(obs_fields_to_merge, adata):
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
456 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
457 Merge adata.obs fields specified in args.adata_obs_fields_to_merge
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
458
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
459 Parameters
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
460 ----------
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
461 obs_fields_to_merge : str
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
462 Fields in adata.obs to merge, comma separated
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
463 adata : anndata.AnnData
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
464 The AnnData object
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
465
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
466 Returns
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
467 -------
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
468 anndata.AnnData
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
469 The merged AnnData object
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
470
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
471 docstring tests:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
472 >>> import scanpy as sc
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
473 >>> ad = sc.datasets.pbmc68k_reduced()
10
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
474 >>> merge_adata_obs_fields(["bulk_labels","louvain"], ad)
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
475 >>> ad.obs.columns
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
476 Index(['bulk_labels', 'n_genes', 'percent_mito', 'n_counts', 'S_score',
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
477 'G2M_score', 'phase', 'louvain', 'bulk_labels_louvain'],
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
478 dtype='object')
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
479 """
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
480 field_name = "_".join(obs_fields_to_merge)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
481 for field in obs_fields_to_merge:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
482 if field not in adata.obs.columns:
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
483 raise ValueError(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
484 f"The '{field}' column is not present in adata.obs."
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
485 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
486 if field_name not in adata.obs.columns:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
487 adata.obs[field_name] = adata.obs[field].astype(str)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
488 else:
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
489 adata.obs[field_name] = (
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
490 adata.obs[field_name] + "_" + adata.obs[field].astype(str)
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
491 )
10
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
492
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
493
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
494 def identify_genes_to_filter_per_contrast(
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
495 contrast_file, min_perc_cells_expression, adata, obs_field
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
496 ):
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
497 """
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
498 Identify genes to filter per contrast based on expression percentage.
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
499 We need those genes to be under the threshold for all conditions
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
500 in a contrast to be identified for further filtering. If
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
501 one condition has the gene expressed above the threshold, the gene
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
502 becomes of interest (it can be highly up or down regulated).
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
503
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
504 Parameters
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
505 ----------
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
506 contrast_file : str
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
507 Path to the contrasts file.
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
508 min_perc_cells_expression : float
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
509 Minimum percentage of cells that should express a gene.
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
510 adata: adata
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
511 Original AnnData file
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
512 obs_field: str
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
513 Field in the AnnData observations where the contrasts are defined.
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
514
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
515 Returns
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
516 -------
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
517 None
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
518
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
519 Examples
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
520 --------
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
521 >>> import anndata
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
522 >>> import pandas as pd
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
523 >>> import numpy as np
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
524 >>> import os
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
525 >>> from io import StringIO
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
526 >>> contrast_file = StringIO(f"contrast{os.linesep}condition1-\
12
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
527 condition2{os.linesep}\
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
528 2*(condition1)-condition2{os.linesep}")
10
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
529 >>> min_perc_cells_expression = 30.0
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
530 >>> data = {
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
531 ... 'obs': pd.DataFrame({'condition': ['condition1', 'condition1',
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
532 ... 'condition2', 'condition2']}),
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
533 ... 'X': np.array([[1, 0, 0, 0, 0], [0, 0, 2, 2, 0],
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
534 ... [0, 0, 1, 1, 0], [0, 0, 0, 2, 0]]),
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
535 ... }
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
536 >>> adata = anndata.AnnData(X=data['X'], obs=data['obs'])
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
537 >>> df = identify_genes_to_filter_per_contrast(
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
538 ... contrast_file, min_perc_cells_expression, adata, 'condition'
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
539 ... ) # doctest:+ELLIPSIS
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
540 Identifying genes to filter using ...
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
541 >>> df.head() # doctest:+ELLIPSIS
12
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
542 contrast gene
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
543 0 condition1-condition2...
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
544 1 condition1-condition2...
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
545 2 2*(condition1)-condition2...
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
546 3 2*(condition1)-condition2...
10
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
547 """
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
548 import re
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
549
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
550 # Implement the logic to identify genes to filter per contrast
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
551 # This is a placeholder implementation
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
552 print(
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
553 f"Identifying genes to filter using {contrast_file} "
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
554 f"with min expression {min_perc_cells_expression}%"
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
555 )
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
556 sides_regex = re.compile(r"[\+\-\*\/\(\)\^]+")
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
557
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
558 contrasts = pd.read_csv(contrast_file, sep="\t")
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
559 # Iterate over each line in the contrast file
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
560 genes_filter_for_contrast = dict()
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
561 for contrast in contrasts.iloc[:, 0]:
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
562 conditions = set(sides_regex.split(contrast))
12
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
563
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
564 selected_conditions = []
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
565 failed_conditions = []
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
566 for condition in conditions:
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
567 # remove any starting or trailing whitespaces from condition
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
568 condition = condition.strip()
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
569 if len(condition) == 0:
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
570 continue
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
571 # check if the condition is simply a number, then skip it
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
572 if condition.isnumeric():
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
573 continue
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
574 if condition not in adata.obs[obs_field].unique():
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
575 # add condition to failed_conditions
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
576 failed_conditions.append(condition)
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
577 continue
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
578 # append to selected_conditions
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
579 selected_conditions.append(condition)
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
580
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
581 if len(failed_conditions) > 0:
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
582 raise ValueError(
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
583 f"Condition(s) '{failed_conditions}' "
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
584 f"from contrast {contrast} "
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
585 f"is/are not present in the "
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
586 f"obs_field '{obs_field}' from the AnnData object."
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
587 f"Possible values are: "
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
588 f"{', '.join(adata.obs[obs_field].unique())}.")
10
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
589 # we want to find the genes that are below the threshold
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
590 # of % of cells expressed for ALL the conditions in the
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
591 # contrast. It is enough for one of the conditions
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
592 # of the contrast to have the genes expressed above
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
593 # the threshold of % of cells to be of interest.
12
a559be56720c planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 1efa285536ea940b459fd07f452a6eeb0cf0ffb9
ebi-gxa
parents: 11
diff changeset
594 for condition in selected_conditions:
10
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
595 # check the percentage of cells that express each gene
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
596 # Filter the AnnData object based on the obs_field value
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
597 adata_filtered = adata[adata.obs[obs_field] == condition]
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
598 # Calculate the percentage of cells expressing each gene
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
599 gene_expression = (adata_filtered.X > 0).mean(axis=0) * 100
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
600 genes_to_filter = set(adata_filtered.var[
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
601 gene_expression.transpose() < min_perc_cells_expression
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
602 ].index.tolist())
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
603 # Update the genes_filter_for_contrast dictionary
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
604 if contrast in genes_filter_for_contrast.keys():
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
605 genes_filter_for_contrast[contrast].intersection_update(
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
606 genes_to_filter
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
607 )
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
608 else:
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
609 genes_filter_for_contrast[contrast] = genes_to_filter
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
610
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
611 # write the genes_filter_for_contrast to pandas dataframe of two columns:
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
612 # contrast and gene
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
613
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
614 # Initialize an empty list to store the expanded pairs
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
615 expanded_pairs = []
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
616
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
617 # Iterate over the dictionary
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
618 for contrast, genes in genes_filter_for_contrast.items():
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
619 for gene in genes:
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
620 expanded_pairs.append((contrast, gene))
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
621
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
622 # Create the DataFrame
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
623 contrast_genes_df = pd.DataFrame(
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
624 expanded_pairs, columns=["contrast", "gene"]
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
625 )
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
626
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
627 return contrast_genes_df
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
628
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
629
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
630 if __name__ == "__main__":
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
631 # Create argument parser
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
632 parser = argparse.ArgumentParser(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
633 description="Perform pseudobulk analysis on an AnnData object"
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
634 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
635
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
636 # Add arguments
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
637 parser.add_argument(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
638 "adata_file", type=str, help="Path to the AnnData file"
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
639 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
640 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
641 "-m",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
642 "--adata_obs_fields_to_merge",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
643 type=str,
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
644 help="Fields in adata.obs to merge, comma separated. \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
645 You can have more than one set of fields, \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
646 separated by semi-colon ;",
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
647 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
648 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
649 "--groupby",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
650 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
651 required=True,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
652 help="The column in adata.obs that defines the groups",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
653 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
654 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
655 "--sample_key",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
656 required=True,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
657 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
658 help="The column in adata.obs that defines the samples",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
659 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
660 # add argument for layer
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
661 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
662 "--layer",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
663 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
664 default=None,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
665 help="The name of the layer of the AnnData object to use",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
666 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
667 # add argument for mode
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
668 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
669 "--mode",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
670 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
671 default="sum",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
672 help="The mode for Decoupler pseudobulk analysis",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
673 choices=["sum", "mean", "median"],
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
674 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
675 # add boolean argument for use_raw
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
676 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
677 "--use_raw",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
678 action="store_true",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
679 default=False,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
680 help="Whether to use the raw part of the AnnData object",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
681 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
682 # add argument for min_cells
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
683 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
684 "--min_cells",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
685 type=int,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
686 default=10,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
687 help="Minimum number of cells for pseudobulk analysis",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
688 )
10
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
689 # add argument for min percentage of cells that should express a gene
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
690 parser.add_argument(
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
691 "--min_gene_exp_perc_per_cell",
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
692 type=float,
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
693 default=50,
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
694 help="If all the conditions of one side of a contrast express a \
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
695 gene in less than this percentage of cells, then the genes \
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
696 will be added to a list of genes to ignore for that contrast.\
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
697 Requires the contrast file to be provided.",
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
698 )
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
699 parser.add_argument(
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
700 "--contrasts_file",
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
701 type=str,
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
702 required=False,
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
703 help="Contrasts file, a one column tsv with a header, each line \
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
704 represents a contrast as a combination of conditions at each \
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
705 side of a substraction.",
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
706 )
f6040492b499 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit dea8a066ccf04e241457719bf5162f9d39fe6c48
ebi-gxa
parents: 9
diff changeset
707
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
708 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
709 "--save_path", type=str, help="Path to save the plot (optional)"
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
710 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
711 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
712 "--min_counts",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
713 type=int,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
714 help="Minimum count threshold for filtering by expression",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
715 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
716 parser.add_argument(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
717 "--min_counts_per_sample_marking",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
718 type=int,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
719 default=20,
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
720 help="Minimum count threshold per sample for \
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
721 marking genes to be ignored after DE",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
722 )
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
723 parser.add_argument(
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
724 "--min_total_counts",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
725 type=int,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
726 help="Minimum total count threshold for filtering by expression",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
727 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
728 parser.add_argument(
16
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
729 "--num_pseudo_replicates",
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
730 type=int,
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
731 choices=range(3, 1000),
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
732 help="Number of pseudo replicates to create per sample (at least 3)",
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
733 required=False
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
734 )
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
735 parser.add_argument(
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
736 "--seed",
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
737 type=int,
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
738 default=None,
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
739 help="Random seed for pseudo replicate sampling",
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
740 )
508a93e34599 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 487508282bda9dbb68138d5c7091f46ef54fe52a
ebi-gxa
parents: 12
diff changeset
741 parser.add_argument(
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
742 "--anndata_output_path",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
743 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
744 help="Path to save the filtered AnnData object or pseudobulk data",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
745 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
746 parser.add_argument(
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
747 "--filter_expr",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
748 action="store_true",
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
749 help="Enable filtering by expression",
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
750 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
751 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
752 "--factor_fields",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
753 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
754 help="Comma separated list of fields for the factors",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
755 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
756 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
757 "--deseq2_output_path",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
758 type=str,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
759 help="Path to save the DESeq2 inputs",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
760 required=True,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
761 )
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
762 parser.add_argument(
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
763 "--plot_samples_figsize",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
764 type=int,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
765 default=[10, 10],
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
766 nargs=2,
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
767 help="Size of the samples plot as a tuple (two arguments)",
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
768 )
8
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
769 parser.add_argument(
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
770 "--plot_filtering_figsize", type=int, default=[10, 10], nargs=2
93f61ea19336 planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit eea5c13f9e6e070a2359c59400773b01f9cd7567
ebi-gxa
parents: 2
diff changeset
771 )
0
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
772
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
773 # Parse the command line arguments
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
774 args = parser.parse_args()
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
775
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
776 # Call the main function
59a7f3f83aec planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 20f4a739092bd05106d5de170523ad61d66e41fc
ebi-gxa
parents:
diff changeset
777 main(args)