Mercurial > repos > ebi-gxa > anndata_ops
comparison anndata_operations.xml @ 28:a0274bc43b7e draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit d01fa18235ac692874cd3cfddef33696c2df8ac1-dirty
author | ebi-gxa |
---|---|
date | Sat, 15 Jul 2023 08:44:46 +0000 |
parents | 7ebc22f77d86 |
children | 2e8022f1923c |
comparison
equal
deleted
inserted
replaced
27:7ebc22f77d86 | 28:a0274bc43b7e |
---|---|
1 <?xml version="1.0" encoding="utf-8"?> | 1 <?xml version="1.0" encoding="utf-8"?> |
2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy91" profile="@PROFILE@"> | 2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy92" profile="@PROFILE@"> |
3 <description>modifies metadata and flags genes</description> | 3 <description>modifies metadata and flags genes</description> |
4 <macros> | 4 <macros> |
5 <import>scanpy_macros2.xml</import> | 5 <import>scanpy_macros2.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="requirements"/> | 7 <expand macro="requirements"/> |
48 python $operations | 48 python $operations |
49 ]]></command> | 49 ]]></command> |
50 <configfiles> | 50 <configfiles> |
51 <configfile name="operations"> | 51 <configfile name="operations"> |
52 import gc | 52 import gc |
53 from os import makedirs | |
53 import scanpy as sc | 54 import scanpy as sc |
54 import anndata | 55 import anndata |
55 from numpy import all | 56 from numpy import all |
56 import logging | 57 import logging |
57 | 58 |
59 if new_field is None: | 60 if new_field is None: |
60 new_field = f"{field}_u" | 61 new_field = f"{field}_u" |
61 appendents = (suffix + df.groupby(field).cumcount().astype(str).replace('0','')).replace(suffix, '') | 62 appendents = (suffix + df.groupby(field).cumcount().astype(str).replace('0','')).replace(suffix, '') |
62 df[new_field] = df[field].astype(str) + appendents.astype(str) | 63 df[new_field] = df[field].astype(str) + appendents.astype(str) |
63 return df | 64 return df |
65 | |
66 adata = sc.read('input.h5') | |
64 | 67 |
65 adata = sc.read('input.h5') | |
66 | |
67 #if $add_cell_metadata.default: | 68 #if $add_cell_metadata.default: |
68 import pandas as pd | 69 import pandas as pd |
69 | 70 |
70 def add_cell_metadata(ad, metadata_file="cell_metadata.tsv", drop_duplicates=True): | 71 def add_cell_metadata(ad, metadata_file="cell_metadata.tsv", drop_duplicates=True): |
71 metadata_df = pd.read_csv(metadata_file, sep="\t", index_col=0) | 72 metadata_df = pd.read_csv(metadata_file, sep="\t", index_col=0) |
94 if prev_dtype == str or prev_dtype == object: | 95 if prev_dtype == str or prev_dtype == object: |
95 prev_dtype = "category" | 96 prev_dtype = "category" |
96 print(f"Changing {col} from {merged_obs[col].dtype} to {prev_dtype}") | 97 print(f"Changing {col} from {merged_obs[col].dtype} to {prev_dtype}") |
97 merged_obs[col] = merged_obs[col].astype(prev_dtype) | 98 merged_obs[col] = merged_obs[col].astype(prev_dtype) |
98 return merged_obs | 99 return merged_obs |
99 | 100 |
100 adata.obs = add_cell_metadata(adata) | 101 adata.obs = add_cell_metadata(adata) |
101 #end if | 102 #end if |
102 | 103 |
103 #if $copy_adata_to_raw: | 104 #if $copy_adata_to_raw: |
104 adata.raw = adata | 105 adata.raw = adata |
106 #end if | |
107 | |
108 #if $swap_layer_to_x.default: | |
109 #if $swap_layer_to_x.new_name_x: | |
110 adata.layers['${swap_layer_to_x.new_name_x}'] = adata.X | |
111 #end if | |
112 adata.X = adata.layers['${swap_layer_to_x.layer}'] | |
113 del adata.layers['${swap_layer_to_x.layer}'] | |
105 #end if | 114 #end if |
106 | 115 |
107 gene_name = '${gene_symbols_field}' | 116 gene_name = '${gene_symbols_field}' |
108 qc_vars = list() | 117 qc_vars = list() |
109 | 118 |
284 if 'n_cells' not in adata.var.columns: | 293 if 'n_cells' not in adata.var.columns: |
285 sc.pp.filter_genes(adata, min_cells=0) | 294 sc.pp.filter_genes(adata, min_cells=0) |
286 if 'n_counts' not in adata.var.columns: | 295 if 'n_counts' not in adata.var.columns: |
287 sc.pp.filter_genes(adata, min_counts=0) | 296 sc.pp.filter_genes(adata, min_counts=0) |
288 | 297 |
298 #if not $split_on_obs.default or $split_on_obs.output_main: | |
289 adata.write('output.h5', compression='gzip') | 299 adata.write('output.h5', compression='gzip') |
300 #end if | |
301 | |
302 #if $split_on_obs.default: | |
303 s = 0 | |
304 res_dir = "output_split" | |
305 makedirs(res_dir, exist_ok=True) | |
306 for field_value in adata.obs["${split_on_obs.key}"].unique(): | |
307 ad_s = adata[adata.obs.${split_on_obs.key} == field_value] | |
308 ad_s.write(f"{res_dir}/${split_on_obs.key}_{s}.h5", compression='gzip') | |
309 if s > 0: | |
310 gc.collect() | |
311 s += 1 | |
312 #end if | |
313 | |
314 | |
290 </configfile> | 315 </configfile> |
291 </configfiles> | 316 </configfiles> |
292 | 317 |
293 <inputs> | 318 <inputs> |
294 <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in hdf5 AnnData format"/> | 319 <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in hdf5 AnnData format"/> |
295 <expand macro="output_object_params_no_loom"/> | 320 <expand macro="output_object_params_no_loom"/> |
321 <conditional name="swap_layer_to_x"> | |
322 <param name="default" type="boolean" checked="false" label="Swap layer to X"/> | |
323 <when value="true"> | |
324 <param name="layer" type="text" value="" label="Name of layer to swap to X" help="This layer name needs to exist within ad.layers or this will fail."/> | |
325 <param name="new_name_x" type="text" value="old_X" label="Name of the new slot for X within layers" help="Leave empty and the old X will be lost."/> | |
326 </when> | |
327 <when value="false"/> | |
328 </conditional> | |
296 <conditional name="add_cell_metadata"> | 329 <conditional name="add_cell_metadata"> |
297 <param name="default" type="boolean" checked="false" label="Merge additional cell metadata"/> | 330 <param name="default" type="boolean" checked="false" label="Merge additional cell metadata"/> |
298 <when value="true"> | 331 <when value="true"> |
299 <param name="file" type="data" label="Cell metadata with headers" help="A tabular file with headers, where the first column contains cell barcodes. Will be merged via a left join, so not all cells in the obs need to be in the metadata. Currently duplicated column headers will be ignored and the originals in the AnnData will be kept." format="tsv,tabular"/> | 332 <param name="file" type="data" label="Cell metadata with headers" help="A tabular file with headers, where the first column contains cell barcodes. Will be merged via a left join, so not all cells in the obs need to be in the metadata. Currently duplicated column headers will be ignored and the originals in the AnnData will be kept." format="tsv,tabular"/> |
300 </when> | 333 </when> |
385 </repeat> | 418 </repeat> |
386 <param name="uns_sources" type="data" label="AnnData objects with uns to copy" help="Extracts uns (such as ranked_genes_groups) from these AnnData objects and merges them into the main input. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data." format="h5,h5ad" multiple="true"/> | 419 <param name="uns_sources" type="data" label="AnnData objects with uns to copy" help="Extracts uns (such as ranked_genes_groups) from these AnnData objects and merges them into the main input. Make sure to use AnnData objects that are compatible in terms of genes, cells and expression data." format="h5,h5ad" multiple="true"/> |
387 </when> | 420 </when> |
388 </conditional> | 421 </conditional> |
389 <param name="sanitize_varm" type="boolean" checked="false" label="Sanitise any null raw.varm objects if any" help="This might be relevant for interfacing with newer versions of AnnData, that might complain if .raw includes a varm null object."/> | 422 <param name="sanitize_varm" type="boolean" checked="false" label="Sanitise any null raw.varm objects if any" help="This might be relevant for interfacing with newer versions of AnnData, that might complain if .raw includes a varm null object."/> |
423 <conditional name="split_on_obs"> | |
424 <param name="default" type="boolean" checked="false" label="Split on obs" help="Split the AnnData object into multiple AnnData objects based on the values of a given obs key. This is useful for example to split a dataset based on a cluster annotation."/> | |
425 <when value="true"> | |
426 <param name="key" type="text" label="Obs key to split on" help="The obs key to split on. For example, if you want to split on cluster annotation, you can use the key 'louvain'."/> | |
427 <param name="output_main" type="boolean" checked="true" label="Output main AnnData object" help="If checked, the main AnnData object will be outputted as well."/> | |
428 </when> | |
429 <when value="false"/> | |
430 </conditional> | |
390 </inputs> | 431 </inputs> |
391 | 432 |
392 <outputs> | 433 <outputs> |
393 <expand macro="output_data_obj_no_loom" description="metadata changes on"/> | 434 <data name="output_h5ad" format="h5ad" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData"> |
435 <filter>output_format == 'anndata_h5ad' and (('output_main' in split_on_obs and split_on_obs['output_main']) or (not split_on_obs['default']))</filter> | |
436 </data> | |
437 <data name="output_h5" format="h5" from_work_dir="output.h5" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData (h5)"> | |
438 <filter>output_format == 'anndata' and (('output_main' in split_on_obs and split_on_obs['output_main']) or (not split_on_obs['default']))</filter> | |
439 </data> | |
440 <collection name="output_h5ad_split" type="list" label="${tool.name} on ${on_string}: @DESCRIPTION@ AnnData split"> | |
441 <discover_datasets pattern="(?P<designation>.+)\.h5" directory="output_split" format="h5ad" visible="true"/> | |
442 <filter>split_on_obs['default']</filter> | |
443 </collection> | |
394 </outputs> | 444 </outputs> |
395 | 445 |
396 <tests> | 446 <tests> |
397 <test> | 447 <test> |
398 <param name="input_obj_file" value="find_cluster.h5"/> | 448 <param name="input_obj_file" value="find_cluster.h5"/> |
399 <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/> | 449 <output name="output_h5ad" file="anndata_ops.h5" ftype="h5ad" compare="sim_size"/> |
450 </test> | |
451 <test> | |
452 <param name="input_obj_file" value="mnn.h5"/> | |
453 <conditional name="swap_layer_to_x"> | |
454 <param name="default" value="true"/> | |
455 <param name="layer" value="mnn"/> | |
456 <param name="new_name_x" value="X_old"/> | |
457 </conditional> | |
458 <output name="output_h5ad" ftype="h5ad"> | |
459 <assert_contents> | |
460 <has_h5_keys keys="layers/X_old" /> | |
461 </assert_contents> | |
462 </output> | |
400 </test> | 463 </test> |
401 <test> | 464 <test> |
402 <param name="input_obj_file" value="anndata_ops.h5"/> | 465 <param name="input_obj_file" value="anndata_ops.h5"/> |
403 <param name="from_var" value = "gene_symbols" /> | 466 <param name="from_var" value = "gene_symbols" /> |
404 <param name="to_var" value = "hello_all" /> | 467 <param name="to_var" value = "hello_all" /> |
481 <output name="output_h5ad" file="anndata_ops_layer.h5" ftype="h5ad" compare="sim_size"> | 544 <output name="output_h5ad" file="anndata_ops_layer.h5" ftype="h5ad" compare="sim_size"> |
482 <assert_contents> | 545 <assert_contents> |
483 <has_h5_keys keys="layers/filtered" /> | 546 <has_h5_keys keys="layers/filtered" /> |
484 </assert_contents> | 547 </assert_contents> |
485 </output> | 548 </output> |
549 </test> | |
550 <test> | |
551 <param name="input_obj_file" value="find_cluster.h5"/> | |
552 <conditional name="split_on_obs"> | |
553 <param name="default" value="true"/> | |
554 <param name="key" value="louvain"/> | |
555 <param name="output_main" value="true"/> | |
556 </conditional> | |
557 <output name="output_h5ad" ftype="h5ad"> | |
558 <assert_contents> | |
559 <has_h5_keys keys="obs/louvain" /> | |
560 </assert_contents> | |
561 </output> | |
562 <output_collection name="output_h5ad_split" type="list" count="5"> | |
563 <element name="louvain_0" ftype="h5ad"> | |
564 <assert_contents> | |
565 <has_h5_keys keys="obs/louvain" /> | |
566 </assert_contents> | |
567 </element> | |
568 </output_collection> | |
486 </test> | 569 </test> |
487 </tests> | 570 </tests> |
488 | 571 |
489 <help><![CDATA[ | 572 <help><![CDATA[ |
490 ============================= | 573 ============================= |