Mercurial > repos > iuc > celltypist
view celltypist.xml @ 1:7518638a7b75 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/celltypist commit b7038e7d88c5b3661a6539b25b6faf95f3c7112a
| author | iuc |
|---|---|
| date | Tue, 10 Mar 2026 21:40:01 +0000 |
| parents | 8722e08a96f4 |
| children |
line wrap: on
line source
<tool id="celltypist" name="CellTypist" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Automated cell type annotation for scRNA-seq datasets</description> <macros> <token name="@TOOL_VERSION@">1.7.1</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">25.0</token> </macros> <xrefs> <xref type="bio.tools">celltypist</xref> </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">celltypist</requirement> </requirements> <creator> <organization name="European Galaxy Team" url="https://galaxyproject.org/eu/people/"/> </creator> <command><![CDATA[ cat '$script_file' && python '$script_file' ]]> </command> <configfiles> <configfile name="script_file"><![CDATA[ import scanpy as sc import celltypist from celltypist import models adata = sc.read_h5ad('$adata') #if $model_source.source == "cached" model = models.Model.load(model='$model_source.cached_model.fields.path') #else if $model_source.source == "history" #if $model_source.history_model_source.history_model_select == "select_model" model = models.Model.load(model='$model_source.history_model_source.history_model') #else if $model_source.history_model_source.history_model_select == "train_model" train_adata = sc.read_h5ad('$model_source.history_model_source.train_anndata') model = celltypist.train(X=train_adata, labels = '$model_source.history_model_source.labels', batch_number = $model_source.history_model_source.batch_number, batch_size = $model_source.history_model_source.batch_size, epochs = $model_source.history_model_source.epochs, feature_selection = $model_source.history_model_source.feature_selection, top_genes = $model_source.history_model_source.top_genes) #end if #end if predictions = celltypist.annotate(adata, model=model, #if $majority_voting majority_voting=True, #end if #if $transpose_input transpose_input=$transpose_input, #end if mode='$mode', p_thres=$p_thres, min_prop=$min_prop) adata = predictions.to_adata() adata.write_h5ad('$anndata_out', compression='gzip') #if $dotplot.generate == "yes" celltypist.dotplot(predictions, use_as_reference='$dotplot.reference', use_as_prediction='$dotplot.prediction', save='.$dotplot.format', show=None) #end if ]]> </configfile> </configfiles> <inputs> <param name="adata" type="data" format="h5ad" label="Input AnnData file" /> <conditional name="model_source"> <param type="select" label="Select model from" name="source"> <option value="cached" selected="true">Cached</option> <option value="history">History</option> </param> <when value="cached"> <param type="select" name="cached_model" label="Choose CellTypist model"> <options from_data_table="celltypist_models"> </options> </param> </when> <when value="history"> <conditional name="history_model_source"> <param type="select" label="Select a models or train a model from history." name="history_model_select"> <option value="select_model" selected="true">Select a compatible model</option> <option value="train_model">Train a model on an existing AnnData and use it</option> </param> <when value="select_model"> <param type="data" format="binary" name="history_model" label="Select compatible models from history." /> </when> <when value="train_model"> <param type="data" format="h5ad" name="train_anndata" label="Select an AnnData file from history." /> <param type="text" name="labels" optional="false" label="The column name in the .obs attribute of the training AnnData file that contains the cell type labels." > <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> <add value="_" /> </valid> </sanitizer> <validator type="regex">[0-9a-zA-Z_]+</validator> </param> <param type="integer" name="batch_number" min="0" value="100" label="Batch number per epoch" help="The number of batches used for training in each epoch; only relevant when mini-batch SGD training is used (use_SGD = True and mini_batch = True)." /> <param type="integer" name="batch_size" min="1" value="1000" label="Cells per batch" help="The number of cells within each batch; only relevant when mini-batch SGD training is used (use_SGD = True and mini_batch = True)." /> <param type="integer" name="epochs" min="1" value="10" label="Epochs for mini-batch training" help="The number of epochs for the mini-batch training procedure; only relevant when mini-batch SGD training is used (use_SGD = True and mini_batch = True)." /> <param type="boolean" name="feature_selection" checked="false" truevalue="True" falsevalue="False" label="Enable two-pass feature selection" help="If true, performs two-pass training where the first round selects important genes using SGD learning; increases training time." /> <param type="integer" name="top_genes" min="1" value="300" label="Top genes per class" help="Number of top genes per class/cell-type based on absolute regression coefficients; the final feature set is the union across classes." /> </when> </conditional> </when> </conditional> <param name="majority_voting" type="boolean" checked="false" truevalue="True" falsevalue="False" label="Refine the predicted labels by running the majority voting classifier after over-clustering" /> <param name="transpose_input" type="boolean" checked="false" truevalue="True" falsevalue="False" label="Transpose the input matrix is provided in the gene-by-cell format." help="Note Celltypist requires the cell-by-gene format"/> <param name="mode" type="select" label="Annotation mode"> <option value="best match">Choose the cell type with the largest score/probability as the final prediction</option> <option value="prob match">Enable a multi-label classification utilising a probability threshold</option> </param> <param name="p_thres" type="float" value="0.5" min="0" max="1" label="Probability threshold for the multi-label classification" help="Ignored if mode is best match." /> <param name="min_prop" type="float" value="0" min="0" max="1" label="The minimum proportion of cells required to support naming of the subcluster by this cell type" help="Ignored if majority_voting is set to False"/> <conditional name="dotplot"> <param name="generate" type="select" label="Generate a dotplot of the predicted cell types" > <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> <when value="no"/> <when value="yes"> <param name="reference" type="text" label="Reference column in AnnData.obs for dotplot" value="cell_type" help="The value can also be clustering column. For eg. 'leiden'"> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> <add value="_" /> </valid> </sanitizer> <validator type="regex">[0-9a-zA-Z_]+</validator> </param> <param name="prediction" type="select" label="Prediction label in AnnData.obs for dotplot"> <option value="majority_voting" selected="true">majority_voting</option> <option value="predicted_labels">predicted_labels</option> </param> <param name="format" type="select" label="Dotplot format"> <option value="png" selected="true">png</option> <option value="pdf">pdf</option> <option value="svg">svg</option> </param> </when> </conditional> </inputs> <outputs> <data name="anndata_out" format="h5ad" label="${tool.name} on ${on_string}: AnnData with celltype annotations" /> <data name="out_png" format="png" from_work_dir="figures/*.png" label="on ${on_string}: Dotplot PNG"> <filter>dotplot['generate'] == 'yes' and dotplot['format'] == 'png'</filter> </data> <data name="out_pdf" format="pdf" from_work_dir="figures/*.pdf" label="on ${on_string}: Dotplot PDF"> <filter>dotplot['generate'] == 'yes' and dotplot['format'] == 'pdf'</filter> </data> <data name="out_svg" format="svg" from_work_dir="figures/*.svg" label="on ${on_string}: Dotplot SVG"> <filter>dotplot['generate'] == 'yes' and dotplot['format'] == 'svg'</filter> </data> </outputs> <tests> <test expect_num_outputs="2"> <param name="adata" location="https://celltypist.cog.sanger.ac.uk/Notebook_demo_data/demo_500_cells.h5ad"/> <conditional name="model_source"> <param name="source" value="cached" /> <param name="cached_model" value="Immune_All_High_v1" /> </conditional> <param name="majority_voting" value="True" /> <param name="mode" value="best match" /> <param name="p_thres" value="0.5" /> <param name="min_prop" value="0.05" /> <conditional name="dotplot"> <param name="generate" value="yes"/> <param name="reference" value="cell_type"/> <param name="prediction" value="majority_voting"/> <param name="format" value="png"/> </conditional> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obs/predicted_labels"/> <has_h5_keys keys="obs/over_clustering"/> <has_h5_keys keys="obs/majority_voting"/> <has_h5_keys keys="obs/conf_score"/> </assert_contents> </output> <output name="out_png" ftype="png" value="majority_voting.png"/> </test> <test expect_num_outputs="1"> <param name="adata" location="https://celltypist.cog.sanger.ac.uk/Notebook_demo_data/demo_500_cells.h5ad"/> <conditional name="model_source"> <param name="source" value="history" /> <conditional name="history_model_source"> <param name="history_model_select" value="select_model"/> <param name="history_model" location="https://celltypist.cog.sanger.ac.uk/models/Pan_Immune_CellTypist/v2/Immune_All_Low.pkl" /> </conditional> </conditional> <param name="majority_voting" value="False" /> <param name="mode" value="prob match" /> <param name="p_thres" value="0.5" /> <param name="min_prop" value="0.05" /> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obs/predicted_labels"/> <has_h5_keys keys="obs/conf_score"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <param name="adata" location="https://celltypist.cog.sanger.ac.uk/Notebook_demo_data/demo_500_cells.h5ad"/> <conditional name="model_source"> <param name="source" value="history" /> <conditional name="history_model_source"> <param name="history_model_select" value="train_model"/> <param name="train_anndata" location="https://celltypist.cog.sanger.ac.uk/Notebook_demo_data/demo_500_cells.h5ad" /> <param name="labels" value="cell_type" /> <param name="top_genes" value="100"/> </conditional> </conditional> <param name="majority_voting" value="False" /> <param name="mode" value="prob match" /> <param name="p_thres" value="0.5" /> <param name="min_prop" value="0.05" /> <conditional name="dotplot"> <param name="generate" value="yes" /> <param name="reference" value="cell_type"/> <param name="prediction" value="predicted_labels"/> <param name="format" value="pdf"/> </conditional> <output name="anndata_out" ftype="h5ad"> <assert_contents> <has_h5_keys keys="obs/predicted_labels"/> <has_h5_keys keys="obs/conf_score"/> </assert_contents> </output> <output name="out_pdf" ftype="pdf" value="predicted_labels.pdf"/> </test> </tests> <help><![CDATA[ **What it does** CellTypist is an automated cell type annotation tool for scRNA-seq datasets on the basis of logistic regression classifiers optimised by the stochastic gradient descent algorithm. CellTypist allows for cell prediction using either built-in (with a current focus on immune sub-populations) or custom models, in order to assist in the accurate classification of different cell types and subtypes. .. _CellTypist: https://www.celltypist.org/ ------ **Inputs** An anndata file in h5ad format that usually contains clustering results from single-cell RNA-seq analysis. ------ **Outputs** An anndata file in h5ad format with predicted cell type annotations added to the .obs attribute. cell_type predicted_labels over_clustering majority_voting conf_score cell1 Plasma cells Plasma cells 13 Follicular B cells 0.996313 cell2 Plasma cells Plasma cells 6 Plasma cells 0.999478 cell3 Plasma cells Plasma cells 12 Plasma cells 0.999957 cell4 Plasma cells Plasma cells 6 Plasma cells 0.996070 cell5 Plasma cells Plasma cells 6 Plasma cells 0.998888 ... ... ... ... ... ... cell496 Macro_pDC pDC 9 Macrophages 0.187152 cell497 Macro_pDC Macrophages 18 pDC 0.849831 cell498 Macro_pDC Macrophages 9 Macrophages 0.809677 cell499 Macro_pDC Macrophages 9 Macrophages 0.937306 cell500 Macro_pDC pDC 9 Macrophages 0.612069 ]]> </help> <citations> <citation type="doi">10.1126/science.abl5197</citation> </citations> </tool>
