Mercurial > repos > ebi-gxa > anndata_ops
comparison anndata_operations.xml @ 27:7ebc22f77d86 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit 2db372e91d658f2c139ff282ffb493ea56f581f8-dirty
author | ebi-gxa |
---|---|
date | Fri, 14 Apr 2023 13:12:01 +0000 |
parents | 825dfd66e3fb |
children | a0274bc43b7e |
comparison
equal
deleted
inserted
replaced
26:825dfd66e3fb | 27:7ebc22f77d86 |
---|---|
1 <?xml version="1.0" encoding="utf-8"?> | 1 <?xml version="1.0" encoding="utf-8"?> |
2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy9" profile="@PROFILE@"> | 2 <tool id="anndata_ops" name="AnnData Operations" version="@TOOL_VERSION@+galaxy91" profile="@PROFILE@"> |
3 <description>modifies metadata and flags genes</description> | 3 <description>modifies metadata and flags genes</description> |
4 <macros> | 4 <macros> |
5 <import>scanpy_macros2.xml</import> | 5 <import>scanpy_macros2.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="requirements"/> | 7 <expand macro="requirements"/> |
38 | 38 |
39 #if $copy_u.default: | 39 #if $copy_u.default: |
40 #for $i, $us in enumerate($copy_u.uns_sources): | 40 #for $i, $us in enumerate($copy_u.uns_sources): |
41 ln -s '${us}' uns_source_${i}.h5 && | 41 ln -s '${us}' uns_source_${i}.h5 && |
42 #end for | 42 #end for |
43 #end if | |
44 | |
45 #if $add_cell_metadata.default: | |
46 ln -s ${add_cell_metadata.file} cell_metadata.tsv && | |
43 #end if | 47 #end if |
44 python $operations | 48 python $operations |
45 ]]></command> | 49 ]]></command> |
46 <configfiles> | 50 <configfiles> |
47 <configfile name="operations"> | 51 <configfile name="operations"> |
55 if new_field is None: | 59 if new_field is None: |
56 new_field = f"{field}_u" | 60 new_field = f"{field}_u" |
57 appendents = (suffix + df.groupby(field).cumcount().astype(str).replace('0','')).replace(suffix, '') | 61 appendents = (suffix + df.groupby(field).cumcount().astype(str).replace('0','')).replace(suffix, '') |
58 df[new_field] = df[field].astype(str) + appendents.astype(str) | 62 df[new_field] = df[field].astype(str) + appendents.astype(str) |
59 return df | 63 return df |
60 | 64 |
61 adata = sc.read('input.h5') | 65 adata = sc.read('input.h5') |
66 | |
67 #if $add_cell_metadata.default: | |
68 import pandas as pd | |
69 | |
70 def add_cell_metadata(ad, metadata_file="cell_metadata.tsv", drop_duplicates=True): | |
71 metadata_df = pd.read_csv(metadata_file, sep="\t", index_col=0) | |
72 # we avoid renames in the original object or outright drop the column in the metadata | |
73 for col in ad.obs.columns: | |
74 if col in metadata_df.columns: | |
75 print(f"Renaming {col} to {col}_x") | |
76 if drop_duplicates: | |
77 metadata_df = metadata_df.drop(col, axis=1) | |
78 else: | |
79 metadata_df.rename(columns={col: col + "_x"}, inplace=True) | |
80 # merge metadata into ad.obs column by column, changing columns to category dtype if they become object dtype on merge | |
81 merged_obs = ad.obs.merge( | |
82 metadata_df, left_index=True, right_index=True, how="left" | |
83 ) | |
84 for o_col in metadata_df.columns: | |
85 col = o_col | |
86 # lets consider cases where columns where renamed during merge | |
87 if o_col + "_x" in merged_obs.columns: | |
88 col = o_col + "_x" | |
89 if o_col + "_y" in merged_obs.columns: | |
90 col = o_col + "_y" | |
91 if col in merged_obs.columns: | |
92 if merged_obs[col].dtype == object: | |
93 prev_dtype = metadata_df[o_col].dtype | |
94 if prev_dtype == str or prev_dtype == object: | |
95 prev_dtype = "category" | |
96 print(f"Changing {col} from {merged_obs[col].dtype} to {prev_dtype}") | |
97 merged_obs[col] = merged_obs[col].astype(prev_dtype) | |
98 return merged_obs | |
99 | |
100 adata.obs = add_cell_metadata(adata) | |
101 #end if | |
62 | 102 |
63 #if $copy_adata_to_raw: | 103 #if $copy_adata_to_raw: |
64 adata.raw = adata | 104 adata.raw = adata |
65 #end if | 105 #end if |
66 | 106 |
251 </configfiles> | 291 </configfiles> |
252 | 292 |
253 <inputs> | 293 <inputs> |
254 <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in hdf5 AnnData format"/> | 294 <param name="input_obj_file" argument="input-object-file" type="data" format="h5,h5ad" label="Input object in hdf5 AnnData format"/> |
255 <expand macro="output_object_params_no_loom"/> | 295 <expand macro="output_object_params_no_loom"/> |
296 <conditional name="add_cell_metadata"> | |
297 <param name="default" type="boolean" checked="false" label="Merge additional cell metadata"/> | |
298 <when value="true"> | |
299 <param name="file" type="data" label="Cell metadata with headers" help="A tabular file with headers, where the first column contains cell barcodes. Will be merged via a left join, so not all cells in the obs need to be in the metadata. Currently duplicated column headers will be ignored and the originals in the AnnData will be kept." format="tsv,tabular"/> | |
300 </when> | |
301 <when value="false"/> | |
302 </conditional> | |
256 <param name="copy_adata_to_raw" type="boolean" label="Copy AnnData to .raw" help="If activated, it will do 'adata.raw = adata'" checked="false"/> | 303 <param name="copy_adata_to_raw" type="boolean" label="Copy AnnData to .raw" help="If activated, it will do 'adata.raw = adata'" checked="false"/> |
257 <repeat name="modifications" title="Change field names in AnnData observations" min="0"> | 304 <repeat name="modifications" title="Change field names in AnnData observations" min="0"> |
258 <param name="from_obs" type="text" label="Original name" help="Name in observations that you want to change"> | 305 <param name="from_obs" type="text" label="Original name" help="Name in observations that you want to change"> |
259 <sanitizer> | 306 <sanitizer> |
260 <valid initial="string.printable"/> | 307 <valid initial="string.printable"/> |
361 </assert_contents> | 408 </assert_contents> |
362 </output> | 409 </output> |
363 </test> | 410 </test> |
364 <test> | 411 <test> |
365 <param name="input_obj_file" value="anndata_ops.h5"/> | 412 <param name="input_obj_file" value="anndata_ops.h5"/> |
413 <conditional name="add_cell_metadata"> | |
414 <param name="default" value="true"/> | |
415 <param name="file" value="test_incomplete_metadata.tsv"/> | |
416 </conditional> | |
417 <output name="output_h5ad" ftype="h5ad"> | |
418 <assert_contents> | |
419 <has_h5_keys keys="obs/cell_type"/> | |
420 </assert_contents> | |
421 </output> | |
422 </test> | |
423 <test> | |
424 <param name="input_obj_file" value="anndata_ops.h5"/> | |
366 <repeat name="var_modifications" > | 425 <repeat name="var_modifications" > |
367 <param name="from_var" value = "gene_symbols" /> | 426 <param name="from_var" value = "gene_symbols" /> |
368 <param name="to_var" value = "gene_symbols_unique" /> | 427 <param name="to_var" value = "gene_symbols_unique" /> |
369 <param name="make_unique" value = "True" /> | 428 <param name="make_unique" value = "True" /> |
370 </repeat> | 429 </repeat> |