# HG changeset patch # User ebi-gxa # Date 1689410686 0 # Node ID a0274bc43b7e6f9072da11e575db1cfa8d09ec52 # Parent 7ebc22f77d86bc791956c014c80de0ce8227b97e planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit d01fa18235ac692874cd3cfddef33696c2df8ac1-dirty diff -r 7ebc22f77d86 -r a0274bc43b7e anndata_operations.xml --- a/anndata_operations.xml Fri Apr 14 13:12:01 2023 +0000 +++ b/anndata_operations.xml Sat Jul 15 08:44:46 2023 +0000 @@ -1,5 +1,5 @@ - + modifies metadata and flags genes scanpy_macros2.xml @@ -50,6 +50,7 @@ import gc +from os import makedirs import scanpy as sc import anndata from numpy import all @@ -61,9 +62,9 @@ appendents = (suffix + df.groupby(field).cumcount().astype(str).replace('0','')).replace(suffix, '') df[new_field] = df[field].astype(str) + appendents.astype(str) return df - + adata = sc.read('input.h5') - + #if $add_cell_metadata.default: import pandas as pd @@ -96,7 +97,7 @@ print(f"Changing {col} from {merged_obs[col].dtype} to {prev_dtype}") merged_obs[col] = merged_obs[col].astype(prev_dtype) return merged_obs - + adata.obs = add_cell_metadata(adata) #end if @@ -104,6 +105,14 @@ adata.raw = adata #end if +#if $swap_layer_to_x.default: +#if $swap_layer_to_x.new_name_x: +adata.layers['${swap_layer_to_x.new_name_x}'] = adata.X +#end if +adata.X = adata.layers['${swap_layer_to_x.layer}'] +del adata.layers['${swap_layer_to_x.layer}'] +#end if + gene_name = '${gene_symbols_field}' qc_vars = list() @@ -286,13 +295,37 @@ if 'n_counts' not in adata.var.columns: sc.pp.filter_genes(adata, min_counts=0) +#if not $split_on_obs.default or $split_on_obs.output_main: adata.write('output.h5', compression='gzip') +#end if + +#if $split_on_obs.default: +s = 0 +res_dir = "output_split" +makedirs(res_dir, exist_ok=True) +for field_value in adata.obs["${split_on_obs.key}"].unique(): + ad_s = adata[adata.obs.${split_on_obs.key} == field_value] + ad_s.write(f"{res_dir}/${split_on_obs.key}_{s}.h5", compression='gzip') + if s > 0: + gc.collect() + s += 1 +#end if + + + + + + + + + + @@ -387,10 +420,27 @@ + + + + + + + + - + + output_format == 'anndata_h5ad' and (('output_main' in split_on_obs and split_on_obs['output_main']) or (not split_on_obs['default'])) + + + output_format == 'anndata' and (('output_main' in split_on_obs and split_on_obs['output_main']) or (not split_on_obs['default'])) + + + + split_on_obs['default'] + @@ -399,6 +449,19 @@ + + + + + + + + + + + + + @@ -484,6 +547,26 @@ + + + + + + + + + + + + + + + + + + + +