view inspect.xml @ 13:14c576ea6148 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/anndata/ commit c1d84c1850c53deccc384de3960d2cec37bb2869
author iuc
date Fri, 08 Nov 2024 22:00:02 +0000
parents 0bf7fddf90c7
children 33c0081e16c6
line wrap: on
line source

<tool id="anndata_inspect" name="Inspect AnnData" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>object</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
@CMD@
      ]]></command>
    <configfiles>
        <configfile name="script_file"><![CDATA[
@CMD_imports@
import pandas as pd
from scipy import io

pd.options.display.precision = 15

adata = ad.read_h5ad('$input')

#if $inspect.info == 'general'
with open('$general', 'w', encoding="utf-8") as f:
    print(adata, file=f)

#else if $inspect.info == 'X'
adata.to_df().to_csv('$X', sep='\t')

#else if $inspect.info == 'obs'
adata.obs.to_csv('$obs', sep='\t')

#else if $inspect.info == 'var'
adata.var.to_csv('$var', sep='\t')

#else if $inspect.info == 'chunk_X'
    #if $inspect.chunk.info == 'random'
X = adata.chunk_X(select=$inspect.chunk.size, replace=$inspect.chunk.replace)
    #else
        #set $select = [int(x.strip()) for x in str($inspect.chunk.list).split(',')]
X = adata.chunk_X(select=$select)
    #end if
pd.DataFrame(X).to_csv('$chunk_X', sep='\t')

#else if $inspect.info == 'uns'
    #if $inspect.uns_info == 'neighbors'
io.mmwrite('uns_neighbors_connectivities.mtx', adata.obsp['connectivities'])
io.mmwrite('uns_neighbors_distances.mtx', adata.obsp['distances'])
    #else if $inspect.uns_info == 'paga'
io.mmwrite('uns_paga_connectivities.mtx', adata.uns['paga']['connectivities'])
io.mmwrite('uns_paga_connectivities_tree.mtx', adata.uns['paga']['connectivities_tree'])
    #else if $inspect.uns_info == 'pca'
pd.DataFrame(adata.uns['pca']['variance']).to_csv("$uns_pca_variance", sep="\t", index = False)
pd.DataFrame(adata.uns['pca']['variance_ratio']).to_csv("$uns_pca_variance_ratio", sep="\t", index = False)
    #else if $inspect.uns_info == 'rank_genes_groups'
pd.DataFrame(adata.uns['rank_genes_groups']['logfoldchanges']).to_csv("$uns_rank_genes_groups_logfoldchanges", sep="\t", index = False)
pd.DataFrame(adata.uns['rank_genes_groups']['names']).to_csv("$uns_rank_genes_groups_names", sep="\t", index = False)
pd.DataFrame(adata.uns['rank_genes_groups']['pvals']).to_csv("$uns_rank_genes_groups_pvals", sep="\t", index = False)
pd.DataFrame(adata.uns['rank_genes_groups']['pvals_adj']).to_csv("$uns_rank_genes_groups_pvals_adj", sep="\t", index = False)
pd.DataFrame(adata.uns['rank_genes_groups']['scores']).to_csv("$uns_rank_genes_groups_scores", sep="\t", index = False)
    #end if

#else if $inspect.info == 'obsm'
    #if $inspect.obsm_info == 'X_pca'
pd.DataFrame(adata.obsm['X_pca']).to_csv("$obsm_X_pca", sep="\t", index = False)
    #else if $inspect.obsm_info == 'X_umap'
pd.DataFrame(adata.obsm['X_umap']).to_csv("$obsm_X_umap", sep="\t", index = False)
    #else if $inspect.obsm_info == 'X_tsne'
pd.DataFrame(adata.obsm['X_tsne']).to_csv("$obsm_X_tsne", sep="\t", index = False)
    #else if $inspect.obsm_info == 'X_draw_graph'
for key in adata.obsm.keys():
    if key.startswith('X_draw_graph'):
        pd.DataFrame(adata.obsm[key]).to_csv(key, sep="\t", index = False)
    #else if $inspect.obsm_info == 'X_diffmap'
pd.DataFrame(adata.obsm['X_diffmap']).to_csv("$obsm_X_diffmap", sep="\t", index = False)
    #end if

#else if $inspect.info == 'varm'
    #if $inspect.varm_info == 'PCs'
pd.DataFrame(adata.varm['PCs']).to_csv("$varm_PCs", sep="\t", index = False)
    #end if

#end if
]]></configfile>
    </configfiles>
    <inputs>
        <param name="input" type="data" format="h5ad" label="Annotated data matrix"/>
        <conditional name="inspect">
            <param name="info" type="select" label="What to inspect?">
                <option value="general">General information about the object</option>
                <option value="X">The full data matrix</option>
                <option value="chunk_X">A chunk of the data matrix</option>
                <option value="obs">Key-indexed observations annotation (obs)</option>
                <option value="var">Key-indexed annotation of variables/features (var)</option>
                <option value="uns">Unstructured annotation (uns)</option>
                <option value="obsm">Multi-dimensional observations annotation (obsm)</option>
                <option value="varm">Multi-dimensional variables annotation (varm)</option>
            </param>
            <when value="general"/>
            <when value="X"/>
            <when value="chunk_X">
                <expand macro="params_chunk_X"/>
            </when>
            <when value="obs"/>
            <when value="var"/>
            <when value="uns">
                <param name="uns_info" type="select" label="What to inspect in uns?">
                    <option value="neighbors">Neighbors</option>
                    <option value="paga">PAGA</option>
                    <option value="pca">PCA</option>
                    <option value="rank_genes_groups">Rank gene groups (rank_genes_groups)</option>
                </param>
            </when>
            <when value="obsm">
                <param name="obsm_info" type="select" label="Which annotation to inspect for the observations?">
                    <option value="X_pca">PCA coordinates (X_pca)</option>
                    <option value="X_umap">UMAP coordinates (X_umap)</option>
                    <option value="X_tsne">tSNE coordinates (X_tsne)</option>
                    <option value="X_draw_graph">Coordinates of graph layout (X_draw_graph)</option>
                    <option value="X_diffmap">Diffusion map representation of data (X_diffmap)</option>
                </param>
            </when>
            <when value="varm">
                <param name="varm_info" type="select" label="Which annotation to inspect for the variables?">
                    <option value="PCs">Principal components containing the loadings</option>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="general" format="txt" label="${tool.name} on ${on_string}: General information">
            <filter>inspect['info'] == 'general'</filter>
        </data>
        <data name="X" format="tabular" label="${tool.name} on ${on_string}: Key-indexed observations annotation (X)">
            <filter>inspect['info'] == 'X'</filter>
        </data>
        <data name="obs" format="tabular" label="${tool.name} on ${on_string}: Key-indexed observations annotation (obs)">
            <filter>inspect['info'] == 'obs'</filter>
        </data>
        <data name="var" format="tabular" label="${tool.name} on ${on_string}: Key-indexed annotation of variables/features (var)">
            <filter>inspect['info'] == 'var'</filter>
        </data>
        <data name="chunk_X" format="tabular" label="${tool.name} on ${on_string}: Observations annotation">
            <filter>inspect['info'] == 'chunk_X'</filter>
        </data>
        <data name="uns_neighbors_connectivities" format="mtx" from_work_dir="uns_neighbors_connectivities.mtx" label="${tool.name} on ${on_string}: Weighted adjacency matrix of the neighborhood graph of data points">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'neighbors'</filter>
        </data>
        <data name="uns_neighbors_distances" format="mtx" from_work_dir="uns_neighbors_distances.mtx" label="${tool.name} on ${on_string}: Distances for each pair of neighbors">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'neighbors'</filter>
        </data>
        <data name="uns_paga_connectivities" format="mtx" from_work_dir="uns_paga_connectivities.mtx" label="${tool.name} on ${on_string}: Full adjacency matrix of the abstracted graph">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'paga'</filter>
        </data>
        <data name="uns_paga_connectivities_tree" format="mtx" from_work_dir="uns_paga_connectivities_tree.mtx" label="${tool.name} on ${on_string}: Adjacency matrix of the tree-like subgraph">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'paga'</filter>
        </data>
        <data name="uns_pca_variance" format="tabular" label="${tool.name} on ${on_string}: Ratio of explained variance for PCA">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'pca'</filter>
        </data>
        <data name="uns_pca_variance_ratio" format="tabular" label="${tool.name} on ${on_string}: Explained variance, equivalent to the eigenvalues of the covariance matrix, for PCA">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'pca'</filter>
        </data>
        <data name="uns_rank_genes_groups_names" format="tabular" label="${tool.name} on ${on_string}: Names for rank genes">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'rank_genes_groups'</filter>
        </data>
        <data name="uns_rank_genes_groups_scores" format="tabular" label="${tool.name} on ${on_string}: Z-scores for rank genes">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'rank_genes_groups'</filter>
        </data>
        <data name="uns_rank_genes_groups_logfoldchanges" format="tabular" label="${tool.name} on ${on_string}: Log2 fold changes for rank genes">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'rank_genes_groups'</filter>
        </data>
        <data name="uns_rank_genes_groups_pvals" format="tabular" label="${tool.name} on ${on_string}: P-values for rank genes">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'rank_genes_groups'</filter>
        </data>
        <data name="uns_rank_genes_groups_pvals_adj" format="tabular" label="${tool.name} on ${on_string}: Adjusted p-values for rank genes">
            <filter>inspect['info'] == 'uns' and inspect['uns_info'] == 'rank_genes_groups'</filter>
        </data>
        <data name="obsm_X_pca" format="tabular" label="${tool.name} on ${on_string}: PCA coordinates for observations">
            <filter>inspect['info'] == 'obsm' and inspect['obsm_info'] == 'X_pca'</filter>
        </data>
        <data name="obsm_X_umap" format="tabular" label="${tool.name} on ${on_string}: UMAP coordinates for observations">
            <filter>inspect['info'] == 'obsm' and inspect['obsm_info'] == 'X_umap'</filter>
        </data>
        <data name="obsm_X_tsne" format="tabular" label="${tool.name} on ${on_string}: tSNE coordinates for observations">
            <filter>inspect['info'] == 'obsm' and inspect['obsm_info'] == 'X_tsne'</filter>
        </data>
        <collection name="obsm_X_draw_graph" type="list" label="${tool.name} on ${on_string}: Coordinates of graph layout">
            <discover_datasets pattern="X_draw_graph_(?P&lt;designation&gt;.*)" format="tabular"/>
            <filter>inspect['info'] == 'obsm' and inspect['obsm_info'] == 'X_draw_graph'</filter>
        </collection>
        <data name="obsm_X_diffmap" format="tabular" label="${tool.name} on ${on_string}: Diffusion map representation for observations">
            <filter>inspect['info'] == 'obsm' and inspect['obsm_info'] == 'X_diffmap'</filter>
        </data>
        <data name="varm_PCs" format="tabular" label="${tool.name} on ${on_string}: Principal components containing the loadings for variables">
            <filter>inspect['info'] == 'varm' and inspect['varm_info'] == 'PCs'</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <!-- test 1: general info -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="general"/>
            </conditional>
            <output name="general" value="inspect.general.txt"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test 2: X -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="X"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.to_df\(\).to_csv"/>
            </assert_stdout>
            <output name="X" value="inspect.X.tabular" ftype="tabular"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test 3: obs -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="obs"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.obs.to_csv"/>
            </assert_stdout>
            <output name="obs" value="inspect.obs.tabular" ftype="tabular"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test 4: var -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="var"/>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.var.to_csv"/>
            </assert_stdout>
            <output name="var" value="inspect.var.tabular" ftype="tabular"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test 5: chunk_X, specified -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="chunk_X"/>
                <conditional name="chunk">
                    <param name="info" value="specified"/>
                    <param name="list" value="3,5,8"/>
                </conditional>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.chunk_X"/>
                <has_text_matching expression="select=\[3, 5, 8\]"/>
            </assert_stdout>
            <output name="chunk_X" value="inspect.chunk_X.specified.tabular" ftype="tabular"/>
        </test>
        <test expect_num_outputs="1">
            <!-- test 6: chunk_X, random -->
            <param name="input" value="krumsiek11.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="chunk_X"/>
                <conditional name="chunk">
                    <param name="info" value="random"/>
                    <param name="list" value="10"/>
                    <param name="replace" value="true"/>
                </conditional>
            </conditional>
            <assert_stdout>
                <has_text_matching expression="adata.chunk_X"/>
                <has_text_matching expression="select=10"/>
                <has_text_matching expression="replace=True"/>
            </assert_stdout>
            <output name="chunk_X">
                <assert_contents>
                    <has_text_matching expression="0\t1\t2\t3\t4\t5\t6\t7\t8\t9\t10"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <!-- test 7: uns, neighbors -->
            <param name="input" value="pp.neighbors_umap_euclidean.recipe_weinreb17.paul15_subsample.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="uns"/>
                <param name="uns_info" value="neighbors"/>
            </conditional>
            <output name="uns_neighbors_connectivities" ftype="mtx">
                <assert_contents>
                    <has_text_matching expression="100	100	2496" />
                    <has_text_matching expression="4.880" />
                </assert_contents>
            </output>
            <output name="uns_neighbors_distances" ftype="mtx">
                <assert_contents>
                    <has_text_matching expression="100	100	1400" />
                    <has_text_matching expression="4.973" />
                    <has_text_matching expression="4.877" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <!-- test 8: uns, paga -->
            <param name="input" value="tl.paga.neighbors_gauss_braycurtis.recipe_weinreb17.paul15_subsample.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="uns"/>
                <param name="uns_info" value="paga"/>
            </conditional>
            <output name="uns_paga_connectivities" ftype="mtx">
                <assert_contents>
                    <has_text_matching expression="16	16	97" />
                    <has_text_matching expression="2 1 1" />
                    <has_text_matching expression="8.839" />
                </assert_contents>
            </output>
            <output name="uns_paga_connectivities_tree" ftype="mtx">
                <assert_contents>
                    <has_text_matching expression="16	16	15" />
                    <has_text_matching expression="1 2 1" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <!-- test 9: uns, pca -->
            <param name="input" value="pp.pca.krumsiek11.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="uns"/>
                <param name="uns_info" value="pca"/>
            </conditional>
            <output name="uns_pca_variance">
                <assert_contents>
                    <has_text_matching expression="0.75409\d{2}" />
                    <has_text_matching expression="3.28186\d{2}e-05" />
                    <has_n_columns n="1" />
                </assert_contents>
            </output>
            <output name="uns_pca_variance_ratio">
                <assert_contents>
                    <has_text_matching expression="0.039053\d{2}" />
                    <has_text_matching expression="0.00013167" />
                    <has_n_columns n="1" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="5">
            <!-- test 10: uns, rank_gene_groups -->
            <param name="input" value="tl.rank_genes_groups.krumsiek11.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="uns"/>
                <param name="uns_info" value="rank_genes_groups"/>
            </conditional>
            <output name="uns_rank_genes_groups_names">
                <assert_contents>
                    <has_n_columns n="5" />
                    <has_text_matching expression="Ery\tMk\tMo\tNeu\tprogenitor"/>
                    <has_text_matching expression="Gata1\tFog1\tPu.1\tCebpa\tEgrNab"/>
                    <has_text_matching expression="EgrNab\tEgrNab\tSCL\tSCL\tGfi1"/>
                </assert_contents>
            </output>
            <output name="uns_rank_genes_groups_scores">
                <assert_contents>
                    <has_n_columns n="5" />
                    <has_text_matching expression="Ery\tMk\tMo\tNeu\tprogenitor"/>
<!--                    <has_text_matching expression="18.8\d{4}"/>-->
                    <has_text_matching expression="17.85673"/>
<!--                    <has_text_matching expression="-2.637\d{4}"/>-->
<!--                    <has_text_matching expression="-2.980\d{4}"/>-->
                    <has_text_matching expression="-6.46\d{4}"/>
                </assert_contents>
            </output>
            <output name="uns_rank_genes_groups_logfoldchanges">
                <assert_contents>
                    <has_n_columns n="5" />
                </assert_contents>
            </output>
            <output name="uns_rank_genes_groups_pvals">
                <assert_contents>
                    <has_n_columns n="5" />
<!--                    <has_text_matching expression="1.8009"/>-->
                </assert_contents>
            </output>
            <output name="uns_rank_genes_groups_pvals_adj">
                <assert_contents>
                    <has_n_columns n="5" />
<!--                    <has_text_matching expression="1.97952"/>-->
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 11: obsm, X_pca -->
            <param name="input" value="pp.pca.krumsiek11.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="obsm"/>
                <param name="obsm_info" value="X_pca"/>
            </conditional>
            <output name="obsm_X_pca">
                <assert_contents>
                    <has_text_matching expression="0.0045348783" />
                    <has_text_matching expression="3.4109413" />
                    <has_text_matching expression="-0.6401007" />
                    <has_n_columns n="10" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 12: obsm_info, X_umap -->
            <param name="input" value="tl.umap.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="obsm"/>
                <param name="obsm_info" value="X_umap"/>
            </conditional>
            <output name="obsm_X_umap">
                <assert_contents>
                    <has_text text="1.664" />
                    <has_text text="5.425" />
                    <has_text text="-1.748" />
                    <has_text text="-9.714" />
                    <has_n_columns n="2" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 13: obsm_info, X_tsne -->
            <param name="input" value="tl.tsne.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="obsm"/>
                <param name="obsm_info" value="X_tsne"/>
            </conditional>
            <output name="obsm_X_tsne">
                <assert_contents>
                    <has_text text="14.301989" />
                    <has_text text="-19.447426" />
                    <has_n_columns n="2" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 14: obsm_info, X_draw_graph -->
            <param name="input" value="tl.draw_graph.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="obsm"/>
                <param name="obsm_info" value="X_draw_graph"/>
            </conditional>
            <output_collection name="obsm_X_draw_graph">
                <element name="fr">
                    <assert_contents>
                        <has_text text="-39.77" />
                        <has_text text="-24.83" />
                        <has_text text="-34.34" />
                        <has_text text="-22.34" />
                        <has_n_columns n="2" />
                    </assert_contents>
                </element>
            </output_collection>
        </test>
        <test expect_num_outputs="1">
            <!-- test 15: obsm_info, X_diffmap -->
            <param name="input" value="tl.diffmap.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="obsm"/>
                <param name="obsm_info" value="X_diffmap"/>
            </conditional>
            <output name="obsm_X_diffmap">
                <assert_contents>
                    <has_text text="0.1006" />
                    <has_text text="-0.0619" />
                    <has_n_columns n="15" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test 16: varm_info, PCs -->
            <param name="input" value="pp.pca.krumsiek11.h5ad"/>
            <conditional name="inspect">
                <param name="info" value="varm"/>
                <param name="varm_info" value="PCs"/>
            </conditional>
            <output name="varm_PCs">
                <assert_contents>
                    <has_text_matching expression="0.2352" />
                    <has_text_matching expression="-0.0492" />
                    <has_n_columns n="10" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

This tool inspects a AnnData dataset and returns:

- General information about the object as text
- The full data matrix (`X`) as a Tabular
- A chunk of the data matrix as a Tabular, using the `chunk_X method <https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.chunk_X.html>`__
- Key-indexed observations annotation (`obs`) as a Tabular
- Key-indexed annotation of variables/features (`var`) as a Tabular

@HELP@
    ]]></help>
    <expand macro="citations"/>
</tool>