Mercurial > repos > iuc > anndata_manipulate

<tool id="anndata_manipulate" name="Manipulate AnnData" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>object</description>
    <macros>
        <import>macros.xml</import>
        <xml name="param_join">
            <param name="join" type="select" label="The connecting string between name and integer">
                <option value="-">-</option>
                <option value="_">_</option>
                <option value=" "> </option>
                <option value="/">/</option>
            </param>
        </xml>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@

adata = ad.read_h5ad('$input')

#if $manipulate.function == 'concatenate'
    #for i, filepath in enumerate($manipulate.other_adatas)
adata_$i = ad.read_h5ad('$filepath')
    #end for
adata = adata.concatenate(
    #for i, filepath in enumerate($manipulate.other_adatas)
    adata_$i,
    #end for
    join='$manipulate.join',
    #if str($manipulate.index_unique) != ''
    index_unique='$manipulate.index_unique',
    #else
    index_unique=None,
    #end if
    #if str($manipulate.uns_merge) != 'None'
    uns_merge='$manipulate.uns_merge',
    #else
    uns_merge=None,
    #end if
    batch_key='$manipulate.batch_key')

#else if $manipulate.function == 'var_names_make_unique'
adata.var_names_make_unique(join='$manipulate.join')

#else if $manipulate.function == 'obs_names_make_unique'
adata.obs_names_make_unique(join='$manipulate.join')

#else if $manipulate.function == 'rename_categories'
    #set $categories = [x.strip() for x in str($manipulate.categories).split(',')]
    #if $manipulate.update_key.new_key == 'no':
adata.rename_categories(
    key='$manipulate.key',
    categories=$categories)
    #else
if '$manipulate.key' in adata.obs:
    print("changing key in obs")
    adata.obs['$manipulate.key_name'] = adata.obs['$manipulate.key']
    adata.rename_categories(
        key='$manipulate.key_name',
        categories=$categories)
elif '$manipulate.key' in adata.var:
    print("changing key in var")
    adata.var['$manipulate.key_name'] = adata.var['$manipulate.key']
    adata.rename_categories(
        key='$manipulate.key_name',
        categories=$categories)
else:
    print("chanigng key in uns")
    adata.uns['$manipulate.key_name'] = adata.uns['$manipulate.key']
    adata.rename_categories(
        key='$manipulate.key_name',
        categories=$categories)
    #end if

#else if $manipulate.function == 'remove_keys'
    #if $manipulate.obs_keys
        #set $keys = [x.strip() for x in str($manipulate.obs_keys).split(',')]
adata.obs = adata.obs.drop(columns=$keys)
    #end if

    #if $manipulate.var_keys
        #set $keys = [x.strip() for x in str($manipulate.var_keys).split(',')]
adata.var = adata.vars.drop(columns=$keys)
    #end if

#else if $manipulate.function == 'flag_genes'
## adapted from anndata operations
    #for $flag in $manipulate.gene_flags
        #if str($flag.col_in) != '':
k_cat = adata.var['${flag.col_in}'].str.startswith('${flag.startswith}')
        #else:
k_cat = adata.var_names.str.startswith('${flag.startswith}')
        #end if
if k_cat.sum() > 0:
    adata.var['${flag.col_out}'] = k_cat
else:
    print(f'No genes starting with {'${flag.startswith}'} found.')
#end for

#else if $manipulate.function == 'rename_obs':
adata.obs['${to_obs}'] = adata.obs['${from_obs}']
#if not $keep_original:
del adata.obs['${from_obs}']
#end if

#else if $manipulate.function == 'rename_var':
adata.var['${to_var}'] = adata.var['${from_var}']
#if not $keep_original:
del adata.var['${from_var}']
#end if

#else if $manipulate.function == 'strings_to_categoricals'
adata.strings_to_categoricals()

#else if $manipulate.function == 'transpose'
adata = adata.transpose()

#else if $manipulate.function == 'add_annotation'
import pandas as pd
extra_annot_t = pd.read_csv('$manipulate.new_annot', sep='\t').reset_index(drop=True)
    #if $manipulate.var_obs == 'var'
var_index = adata.var_names
var = pd.concat([adata.var.reset_index(drop=True), extra_annot_t], axis=1)
var.index = var_index
adata.var = var
    #else if $manipulate.var_obs == 'obs'
obs_index = adata.obs.index
obs = pd.concat([adata.obs.reset_index(drop=True), extra_annot_t], axis=1)
obs.index = obs_index
adata.obs = obs
    #end if

#else if $manipulate.function == 'split_on_obs'
import os
res_dir = "output_split"
os.makedirs(res_dir, exist_ok=True)
for s,field_value in enumerate(adata.obs["${manipulate.key}"].unique()):
    ad_s = adata[adata.obs.${manipulate.key} == field_value]
    ad_s.write(f"{res_dir}/${manipulate.key}_{s}.h5ad", compression='gzip')

#else if $manipulate.function == 'copy_obs'
source_adata = ad.read_h5ad('$source_adata')
    #for $key in $manipulate.keys
if '$key.source_key' in source_adata.obs:
        #if str($key.target_key) == '':
    adata.obs['$key.source_key'] = source_adata.obs['$key.source_key']
        #else
    adata.obs['$key.target_key'] = source_adata.obs['$key.source_key']
        #end if
else:
    print(f"Obs column {'{$key.source_key}'} not found in source AnnData.")
    #end for

#else if $manipulate.function == 'copy_uns'
source_adata = ad.read_h5ad('$source_adata')
    #for $key in $manipulate.keys
if '$key.source_key' in source_adata.uns:
        #if str($key.target_key) == '':
    adata.uns['$key.source_key'] = source_adata.uns['$key.source_key']
        #else
    adata.uns['$key.target_key'] = source_adata.uns['$key.source_key']
        #end if
else:
    print(f"Uns key {'{$key.source_key}'} not found in source AnnData.")
    #end for

#else if $manipulate.function == 'copy_embed'
source_adata = ad.read_h5ad('$source_adata')
    #for $key in $manipulate.keys
if '$key.source_key' in source_adata.obsm:
        #if $key.target_key is None
    adata.obsm['$key.source_key'] = source_adata.obsm['$key.source_key']
        #else
    adata.obsm['$key.target_key'] = source_adata.obsm['$key.source_key']
        #end if
else:
    print(f"Embedding key {'{$key.source_key}'} not found in source AnnData.")
    #end for

#else if $manipulate.function == 'copy_layers'
source_adata = ad.read_h5ad('$source_adata')
    #for $key in $manipulate.keys
if '$key.source_key' in source_adata.layers:
        #if $key.target_key is None
    adata.layers['$key.source_key'] = source_adata.layers['$key.source_key']
        #else
    adata.layers['$key.target_key'] = source_adata.layers['$key.source_key']
        #end if
else:
    print(f"Layer {'{$key.source_key}'} not found in source AnnData.")
    #end for

#else if $manipulate.function == 'copy_X'
source_adata = ad.read_h5ad('$source_adata')
    #if $target_key is None
adata.X = source_adata.X
    #else
adata.layers['$target_key'] = source_adata.X
    #end if

#else if $manipulate.function == 'save_raw'
adata.raw = adata

#end if

#if $manipulate.function != 'split_on_obs'
adata.write('anndata.h5ad', compression='gzip')
print(adata)
#end if

]]></configfile>
    </configfiles>
    <inputs>
        <param name="input" type="data" format="h5ad" label="Annotated data matrix"/>
        <conditional name="manipulate">
            <param name="function" type="select" label="Function to manipulate the object">
                <option value="concatenate">Concatenate along the observations axis</option>
                <option value="obs_names_make_unique">Makes the obs index unique by appending '1', '2', etc</option>
                <option value="var_names_make_unique">Makes the var index unique by appending '1', '2', etc</option>
                <option value="rename_categories">Rename categories of annotation</option>
                <option value="remove_keys">Remove keys from obs or var annotations</option>
                <option value="flag_genes">Flag genes start with a pattern</option><!--adapted from EBI anndata operations tool -->
                <option value="rename_obs">Rename fileds in AnnData observations</option><!--adapted from EBI anndata operations tool -->
                <option value="rename_var">Rename fileds in AnnData variables</option><!--adapted from EBI anndata operations tool -->
                <option value="strings_to_categoricals">Transform string annotations to categoricals</option>
                <option value="transpose">Transpose the data matrix, leaving observations and variables interchanged</option>
                <option value="add_annotation">Add new annotation(s) for observations or variables</option>
                <option value="split_on_obs">Split the AnnData object into multiple AnnData objects based on the values of a given obs key</option><!--adapted from EBI anndata operations tool-->
                <option value="copy_obs">Copy observation keys from a different anndata object</option>
                <option value="copy_uns">Copy uns keys from a different anndata object</option>
                <option value="copy_embed">Copy embeddings from a different anndata object</option>
                <option value="copy_layers">Copy layers from a different anndata object</option>
                <option value="copy_X">Copy data matrix (.X) from a different anndata object</option>
                <option value="save_raw">Freeze the current state into the 'raw' attribute</option>
            </param>
            <when value="concatenate">
                <param name="other_adatas" type="data" format="h5ad" multiple="true" label="Annotated data matrix to add"/>
                <param name="join" type="select" label="Join method">
                    <option value="inner">Intersection of variables</option>
                    <option value="outer">Union of variables</option>
                </param>
                <param name="batch_key" type="text" value="batch" label="Key to add the batch annotation to obs"/>
                <param name="uns_merge" type="select" label="Strategy to use for merging entries of uns" help="These strategies are applied recusivley.">
                    <option value="None" selected="true">The default. The concatenated object will just have an empty dict for uns</option>
                    <option value="same">Only entries which have the same value in all AnnData objects are kept</option>
                    <option value="unique">Only entries which have one unique value in all AnnData objects are kept</option>
                    <option value="first">The first non-missing value is used</option>
                    <option value="only">A value is included if only one of the AnnData objects has a value at this path</option>
                </param>
                <param name="index_unique" type="select" label="Separator to join the existing index names with the batch category" help="Leave it empty to keep existing indices">
                    <option value="-">-</option>
                    <option value="_">_</option>
                    <option value=" "> </option>
                    <option value="/">/</option>
                </param>
            </when>
            <when value="obs_names_make_unique">
                <expand macro="param_join"/>
            </when>
            <when value="var_names_make_unique">
                <expand macro="param_join"/>
            </when>
            <when value="rename_categories">
                <param name="key" type="text" value="" label="Key for observations or variables annotation" help="Annotation key in obs or var"/>
                <param name="categories" type="text" value="" label="Comma-separated list of new categories" help="It should be the same number as the old categories"/>
                <conditional name="update_key">
                    <param name="new_key" type="select" label="Add categories to a new key?" help="If Yes, a new key will be created with the new categories, otherwise the old key will be updated">
                        <option value="yes">Yes</option>
                        <option value="no" selected="true">No</option>
                    </param>
                    <when value="yes">
                        <param name="key_name" type="text" value="" optional="false" label="Key name">
                            <expand macro="sanitize_query"/>
                        </param>
                    </when>
                    <when value="no"></when>
                </conditional>
            </when>
            <when value="remove_keys">
                <param name="obs_keys" type="text" value="" optional="true" label="Keys/fields to remove from observations (obs)">
                    <expand macro="sanitize_query"/>
                </param>
                <param name="var_keys" type="text" value="" optional="true" label="Keys/fields to remove from variables (var)">
                    <expand macro="sanitize_query"/>
                </param>
            </when>
            <when value="flag_genes">
                <repeat name="gene_flags" title="Flag genes that start with these names">
                    <param name="startswith" type="text" label="Text that you expect the genes to be flagged to start with" help="For example, 'MT-' for mito genes">
                        <sanitizer invalid_char="">
                            <valid initial="string.ascii_letters,string.digits,string.punctuation">
                                <remove value="&apos;" />
                            </valid>
                        </sanitizer>
                    </param>
                    <param name="col_in" value='' optional="true" type="text" label="Column in .var to use" help="By default it uses the var_names (normally gene symbols)">
                        <expand macro="sanitize_query"/>
                    </param>
                    <param name="col_out" type="text" label="Name of the column in var.names where this boolean flag is stored" help="For example, name this column as 'mito' for mitochondrial genes.">
                        <expand macro="sanitize_query"/>
                    </param>
                </repeat>
            </when>
            <when value="rename_obs">
                <param name="from_obs" type="text" label="Name of the observations field that you want to change">
                    <expand macro="sanitize_query"/>
                </param>
                <param name="to_obs" type="text" label="New name of the field in the observations">
                    <expand macro="sanitize_query"/>
                </param>
                <param name="keep_original" type="boolean" checked="false" label="Keep original" help="If activated, it will also keep the original column"/>
            </when>
            <when value="rename_var">
                <param name="from_var" type="text" label="Name of the variables field that you want to change">
                    <expand macro="sanitize_query"/>
                </param>
                <param name="to_var" type="text" label="New name of the filed in the variables">
                    <expand macro="sanitize_query"/>
                </param>
                <param name="keep_original" type="boolean" checked="false" label="Keep original" help="If activated, it will also keep the original column"/>
            </when>
            <when value="strings_to_categoricals" ></when>
            <when value="transpose" ></when>
            <when value="add_annotation">
                <param name="var_obs" type="select" label="What to annotate?">
                    <option value="var">Variables (var)</option>
                    <option value="obs">Observations (obs)</option>
                </param>
                <param name="new_annot" type="data" format="tabular" label="Table with new annotations"
                    help="The new table should have the same number of rows and the same order as obs or var. The key names should be in the header (1st line)"/>
            </when>
            <when value="split_on_obs">
                <param name="key" type="text" label="The obs key to split on" help="For example, if you want to split on cluster annotation, you can use the key 'louvain'. The output will be a collection of anndata objects">
                    <sanitizer invalid_char="">
                        <valid initial="string.ascii_letters,string.digits,string.punctuation">
                            <remove value="&apos;" />
                        </valid>
                    </sanitizer>
                </param>
            </when>
            <when value="save_raw"></when>
            <when value="copy_obs">
                <param name="source_adata" type="data" format="h5ad" label="Source anndata object" help="Ideally the source AnnData object should contain the same set of genes and cells."/>
                <repeat name="keys" title="Keys from obs to copy" min="1">
                    <param name="source_key" type="text" optional="true" label="Column to be copied from the source anndata" help="louvain, batch, etc. Provide one key at a time.">
                        <expand macro="sanitize_query"/>
                    </param>
                    <param name="target_key" type="text" optional="true" label="Target column name" help="Warning! Provide a new key name to avoid rewriting. Leave empty to copy to the same key.">
                        <expand macro="sanitize_query"/>
                    </param>
                </repeat>
            </when>
            <when value="copy_uns">
                <param name="source_adata" type="data" format="h5ad" label="Source anndata object" help="Ideally the source AnnData object should contain the same set of genes and cells."/>
                <repeat name="keys" title="Keys from uns to copy" min="1">
                    <param name="source_key" type="text" optional="true" label="Uns key to be copied from the source anndata" help="hvg, neighbors, etc. Provide one key at a time.">
                        <expand macro="sanitize_query"/>
                    </param>
                    <param name="target_key" type="text" optional="true" label="Target key name" help="Warning! Provide a new key name to avoid rewriting. Leave empty to copy to the same key.">
                        <expand macro="sanitize_query"/>
                    </param>
                </repeat>
            </when>
            <when value="copy_embed">
                <param name="source_adata" type="data" format="h5ad" label="Source anndata object" help="Ideally the source AnnData object should contain the same set of genes and cells."/>
                <repeat name="keys" title="Keys from embeddings to copy" min="1">
                    <param name="source_key" type="text" label="Key to be copied from the source anndata" help="tSNE, UMAP, etc. Provide one key at a time.">
                        <expand macro="sanitize_query"/>
                    </param>
                    <param name="target_key" type="text" optional="true" label="Target key name" help="Warning! Provide a new key name to avoid rewriting. Leave empty to copy to the same key.">
                        <expand macro="sanitize_query"/>
                    </param>
                </repeat>
            </when>
            <when value="copy_layers">
                <param name="source_adata" type="data" format="h5ad" label="Source anndata object" help="Ideally the source AnnData object should contain the same set of genes and cells."/>
                <repeat name="keys" title="Layers to copy" min="1">
                    <param name="source_key" type="text" label="Layer to be copied from the source anndata">
                        <expand macro="sanitize_query"/>
                    </param>
                    <param name="target_key" type="text" optional="true" label="Target layer name" help="Warning! give a new key name to avoid rewriting. Leave empty to copy to the same key.">
                        <expand macro="sanitize_query"/>
                    </param>
                </repeat>
            </when>
            <when value="copy_X">
                <param name="source_adata" type="data" format="h5ad" label="Source anndata object" help="Ideally the source AnnData object should contain the same set of genes and cells."/>
                <param name="target_key" type="text" optional="true" label="Give a target layer name or leave empty to overwrite .X" help="Warning! give a new key name to avoid rewriting. Leave empty to copy to the .X of the current anndata">
                    <expand macro="sanitize_query"/>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="anndata" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${manipulate.function}) on ${on_string}">
            <filter>manipulate['function'] != 'split_on_obs'</filter>
        </data>
        <collection name="output_h5ad_split" type="list" label="${tool.name} (${manipulate.function}) on ${on_string} Collection">
            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.h5" directory="output_split" format="h5ad" visible="true"/>
            <filter>manipulate['function'] == 'split_on_obs'</filter>
        </collection>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <!-- test 1 -->
            <param name="input" value="import.csv.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="concatenate"/>
                <param name="other_adatas" value="import.csv.h5ad"/>
                <param name="join" value="inner"/>
                <param name="batch_key" value="batch"/>
                <param name="index_unique" value="-"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata_0"/>
                <has_text_matching expression="adata.concatenate"/>
                <has_text_matching expression="join='inner'"/>
                <has_text_matching expression="index_unique='-'"/>
                <has_text_matching expression="batch_key='batch'"/>
                <has_text_matching expression="6 × 2"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/batch"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 2 -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="obs_names_make_unique"/>
                <param name="join" value="-"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.obs_names_make_unique\(join='-'\)"/>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/cell_type"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 3 -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="var_names_make_unique"/>
                <param name="join" value="-"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.var_names_make_unique\(join='-'\)"/>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/cell_type"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 4 -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="rename_categories"/>
                <param name="key" value="cell_type"/>
                <param name="categories" value="ery, mk, mo, progenitor"/>
                <conditional name="update_key">
                    <param name="new_key" value="no"/>
                </conditional>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.rename_categories"/>
                <has_text_matching expression="key='cell_type'"/>
                <has_text_matching expression="categories=\['ery', 'mk', 'mo', 'progenitor'\]"/>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/cell_type"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 5 -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="strings_to_categoricals"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.strings_to_categoricals"/>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/cell_type"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 6 -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="transpose"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.transpose"/>
                <has_text_matching expression="11 × 500"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="var/cell_type"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 7 -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="add_annotation"/>
                <param name="var_obs" value="var"/>
                <param name="new_annot" value="var_add_annotation.tabular"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/cell_type"/>
                    <has_h5_keys keys="var/annot1"/>
                    <has_h5_keys keys="var/annot2"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 8 -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="add_annotation"/>
                <param name="var_obs" value="obs"/>
                <param name="new_annot" value="obs_add_annotation.tabular"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/cell_type"/>
                    <has_h5_keys keys="obs/annot1"/>
                    <has_h5_keys keys="obs/annot2"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 9 -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="save_raw"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/cell_type"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 10 remove_keys -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="remove_keys"/>
                <param name="obs_keys" value="cell_type"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 11 flag_genes -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="flag_genes"/>
                <repeat name="gene_flags">
                    <param name="startswith" value="Gata"/>
                    <param name="col_out" value="Gata_TF"/>
                </repeat>
                <repeat name="gene_flags">
                    <param name="startswith" value="Gf"/>
                    <param name="col_out" value="GF"/>
                </repeat>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="var/Gata_TF"/>
                    <has_h5_keys keys="var/GF"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 12 split_on_obs -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="split_on_obs"/>
                <param name="key" value="cell_type"/>
            </conditional>
            <output_collection name="output_h5ad_split" type="list">
                <element name="cell_type_0">
                    <assert_contents>
                        <has_h5_keys keys="obs/cell_type"/>
                        <has_h5_keys keys="uns/highlights"/>
                        <has_h5_keys keys="uns/iroot"/>
                    </assert_contents>
                </element>
                <element name="cell_type_1">
                    <assert_contents>
                        <has_h5_keys keys="obs/cell_type"/>
                        <has_h5_keys keys="uns/highlights"/>
                        <has_h5_keys keys="uns/iroot"/>
                    </assert_contents>
                </element>
                <element name="cell_type_2">
                    <assert_contents>
                        <has_h5_keys keys="obs/cell_type"/>
                        <has_h5_keys keys="uns/highlights"/>
                        <has_h5_keys keys="uns/iroot"/>
                    </assert_contents>
                </element>
                <element name="cell_type_3">
                    <assert_contents>
                        <has_h5_keys keys="obs/cell_type"/>
                        <has_h5_keys keys="uns/highlights"/>
                        <has_h5_keys keys="uns/iroot"/>
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <test expect_num_outputs="1">
            <!-- test 13 rename categories with new key -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="rename_categories"/>
                <param name="key" value="cell_type"/>
                <param name="categories" value="ery, mk, mo, progenitor"/>
                <conditional name="update_key">
                    <param name="new_key" value="yes"/>
                    <param name="key_name" value="new_cell_type"/>
                </conditional>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.rename_categories"/>
                <has_text_matching expression="key='new_cell_type'"/>
                <has_text_matching expression="categories=\['ery', 'mk', 'mo', 'progenitor'\]"/>
                <has_text_matching expression="changing key in obs"/>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/cell_type"/>
                    <has_h5_keys keys="obs/new_cell_type"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 14 flag_genes with a key -->
            <param name="input" value="flag_new_key.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="flag_genes"/>
                <repeat name="gene_flags">
                    <param name="startswith" value="ENSG"/>
                    <param name="col_in" value="gene_ids"/>
                    <param name="col_out" value="ensembl"/>
                </repeat>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="199 × 199"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="var/ensembl"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 15 copy_obs -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="copy_obs"/>
                <param name="source_adata" value="krumsiek11.h5ad"/>
                <repeat name="keys">
                    <param name="source_key" value="cell_type"/>
                    <param name="target_key" value="new_cell_type"/>
                </repeat>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.obs\['new_cell_type'\] = source_adata.obs\['cell_type'\]"/>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/cell_type"/>
                    <has_h5_keys keys="obs/new_cell_type"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 16 copy_uns -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="copy_uns"/>
                <param name="source_adata" value="krumsiek11.h5ad"/>
                <repeat name="keys">
                    <param name="source_key" value="iroot"/>
                    <param name="target_key" value="new_iroot"/>
                </repeat>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.uns\['new_iroot'\] = source_adata.uns\['iroot'\]"/>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/cell_type"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                    <has_h5_keys keys="uns/new_iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 17 copy_embed -->
            <param name="input" value="tl.umap.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="copy_embed"/>
                <param name="source_adata" value="tl.umap.h5ad"/>
                <repeat name="keys">
                    <param name="source_key" value="X_pca"/>
                    <param name="target_key" value="new_X_pca"/>
                </repeat>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.obsm\['new_X_pca'\] = source_adata.obsm\['X_pca'\]"/>
                <has_text_matching expression="100 × 800"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/paul15_clusters"/>
                    <has_h5_keys keys="uns/neighbors"/>
                    <has_h5_keys keys="uns/iroot"/>
                    <has_h5_keys keys="obsm/X_pca"/>
                    <has_h5_keys keys="obsm/new_X_pca"/>
                    <has_h5_keys keys="obsm/X_umap"/>
                    <has_h5_keys keys="layers/count"/>
                    <has_h5_keys keys="obsp/connectivities"/>
                    <has_h5_keys keys="obsp/distances"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 18 copy_layers -->
            <param name="input" value="tl.umap.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="copy_layers"/>
                <param name="source_adata" value="tl.umap.h5ad"/>
                <repeat name="keys">
                    <param name="source_key" value="count"/>
                    <param name="target_key" value="new_count"/>
                </repeat>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.layers\['new_count'\] = source_adata.layers\['count'\]"/>
                <has_text_matching expression="100 × 800"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/paul15_clusters"/>
                    <has_h5_keys keys="uns/neighbors"/>
                    <has_h5_keys keys="uns/iroot"/>
                    <has_h5_keys keys="obsm/X_pca"/>
                    <has_h5_keys keys="obsm/X_umap"/>
                    <has_h5_keys keys="layers/count"/>
                    <has_h5_keys keys="layers/new_count"/>
                    <has_h5_keys keys="obsp/connectivities"/>
                    <has_h5_keys keys="obsp/distances"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 19 copy_X -->
            <param name="input" value="tl.umap.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="copy_X"/>
                <param name="source_adata" value="tl.umap.h5ad"/>
                <param name="target_key" value="new_X"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.layers\['new_X'\] = source_adata.X"/>
                <has_text_matching expression="100 × 800"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/paul15_clusters"/>
                    <has_h5_keys keys="uns/neighbors"/>
                    <has_h5_keys keys="uns/iroot"/>
                    <has_h5_keys keys="obsm/X_pca"/>
                    <has_h5_keys keys="obsm/X_umap"/>
                    <has_h5_keys keys="layers/count"/>
                    <has_h5_keys keys="layers/new_X"/>
                    <has_h5_keys keys="obsp/connectivities"/>
                    <has_h5_keys keys="obsp/distances"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 20 save_raw -->
            <param name="input" value="tl.umap.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="save_raw"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.raw = adata"/>
                <has_text_matching expression="100 × 800"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/paul15_clusters"/>
                    <has_h5_keys keys="uns/neighbors"/>
                    <has_h5_keys keys="uns/iroot"/>
                    <has_h5_keys keys="obsm/X_pca"/>
                    <has_h5_keys keys="obsm/X_umap"/>
                    <has_h5_keys keys="layers/count"/>
                    <has_h5_keys keys="obsp/connectivities"/>
                    <has_h5_keys keys="obsp/distances"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 21 rename_obs -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="rename_obs"/>
                <param name="from_obs" value="cell_type"/>
                <param name="to_obs" value="new_cell_type"/>
                <param name="keep_original" value="false"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.obs\['new_cell_type'\] = adata.obs\['cell_type'\]"/>
                <has_text_matching expression="del adata.obs\['cell_type'\]"/>
                <has_text_matching expression="500 × 11"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="obs/new_cell_type"/>
                    <has_h5_keys keys="uns/highlights"/>
                    <has_h5_keys keys="uns/iroot"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 22 rename_var -->
            <param name="input" value="flag_new_key.h5ad"/>
            <conditional name="manipulate">
                <param name="function" value="rename_var"/>
                <param name="from_var" value="gene_ids"/>
                <param name="to_var" value="new_id"/>
                <param name="keep_original" value="false"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.var\['new_id'\] = adata.var\['gene_ids'\]"/>
                <has_text_matching expression="del adata.var\['gene_ids'\]"/>
                <has_text_matching expression="199 × 199"/>
            </assert_stdout>
            <output name="anndata" ftype="h5ad">
                <assert_contents>
                    <has_h5_keys keys="var/feature_types"/>
                    <has_h5_keys keys="var/test"/>
                    <has_h5_keys keys="var/new_id"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool takes an AnnData dataset, manipulates it and returns it.

The possible manipulations are:

- Concatenate along the observations axis (`concatenate method <https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.concatenate.html>`__)

    The `uns`, `varm` and `obsm` attributes are ignored.

    If you use `join='outer'` this fills 0s for sparse data when variables are absent in a batch. Use this with care. Dense data is filled with `NaN`

- Makes the obs index unique by appending '1', '2', etc (`obs_names_make_unique method <https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.obs_names_make_unique.html>`__)

    The first occurrence of a non-unique value is ignored.

- Makes the var index unique by appending '1', '2', etc (`var_names_make_unique method <https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.var_names_make_unique.html>`__)

    The first occurrence of a non-unique value is ignored.

- Rename categories of annotation `key` in `obs`, `var` and `uns` (`rename_categories method <https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.rename_categories.html>`__)

    Besides calling `self.obs[key].cat.categories = categories` - similar for `var` - this also renames categories in unstructured annotation that uses the categorical annotation `key`

- Remove keys from obs or var annotations

    Helps in cleaning up andata with many annotations. For example, helps in removing qc metrics calculated during the preprocesing or already existing cluster annotations.

- Flag genes start with a pattern

    Useful for flagging the mitochondrial or ribosomal protein genes

- Transform string annotations to categoricals (`strings_to_categoricals method <https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.strings_to_categoricals.html>`__)

    Only affects string annotations that lead to less categories than the total number of observations.

- Transpose the data matrix, leaving observations and variables interchanged (`transpose method <https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.transpose.html>`__)

    Data matrix is transposed, observations and variables are interchanged.

- Add annotation for variables or

- Split the AnnData object into multiple AnnData objects based on the values of a given obs key

    For example, helps in splitting an anndata objects based on cluster annotation. This function generates a collection with a number of elements equal to the number of categories in the input obs key.

- Filter data variables or observations, by index or key

- Freeze the current state into the 'raw' attribute

@HELP@
    ]]></help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Sat, 11 Jan 2025 21:10:19 +0000
parents	c4209ea387d4
children