Mercurial > repos > ebi-gxa > celltypist_predict
view celltypist.xml @ 1:df005630100e draft default tip
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 4dd95e3e6c059cc9df47f5d08e4f3f8b618830f1-dirty
| author | ebi-gxa |
|---|---|
| date | Wed, 19 Feb 2025 12:10:17 +0000 |
| parents | a7d6985ba791 |
| children |
line wrap: on
line source
<tool id="celltypist_predict" name="CellTypist: Predict Cell Types" version="1.6.3+galaxy1" profile="20.05" license="MIT"> <description>Predict cell types using an existing CellTypist model.</description> <requirements> <requirement type="package" version="1.6.3">celltypist</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ #if $inp.format == 'h5ad': ln -s "$inp.data" input.h5ad && #else: ln -s "$inp.data" input.mtx && #end if ln -s "$model_file" model.pkl && python $__tool_directory__/celltypist_CLI.py --action predict --output_path "$output_predictions" --model model.pkl #if $inp.format == 'h5ad': --input_path input.h5ad #if $inp.raw_counts_layer: --raw_counts_layer "$inp.raw_counts_layer" #end if #if $inp.gene_symbols_field: --gene_symbols_field "$inp.gene_symbols_field" #end if #else: --input_path input.mtx --gene_file "$inp.gene_file" --cell_file "$inp.cell_file" #end if ${normalize} ${transpose_input} --mode "$mode" --p_thres "$p_thres" ${majority_voting} #if $over_clustering: --over_clustering "$over_clustering" #end if ]]> </command> <inputs> <conditional name="inp" label="Input format"> <param name="format" type="select" label="Input format" help="Format for the input files, either AnnData or Matrix market format (10x)"> <option value="h5ad">AnnData</option> <option value="mtx">Matrix market</option> </param> <when value="h5ad"> <param format="h5ad" type="data" name="data" label="Input data (AnnData)" help="The input data for prediction."/> <param type="text" name="raw_counts_layer" label="Raw counts layer" help="The name of the layer that stores the raw counts. Uses default matrix if not present."/> <param type="text" name="gene_symbols_field" label="Gene symbols field" help="The field in AnnData where the gene symbols are stored, if not in index."/> </when> <when value="mtx"> <param format="mtx,txt" type="data" name="data" label="Input matrix (mtx file)" help="The input data for prediction."/> <param format="txt,tabular" type="data" name="gene_file" label="Gene file" help="The file containing one gene per line corresponding to the genes in the input data (required for .mtx data)."/> <param format="txt,tabular" type="data" name="cell_file" label="Cell file" help="The file containing one cell per line corresponding to the cells in the input data (required for .mtx data)."/> </when> </conditional> <param format="data" type="data" name="model_file" optional="false" label="Model file" help="A CellTypist model file in .pkl format."/> <param type="boolean" argument="--normalize" label="Normalize" truevalue="--normalize" falsevalue="" help="If raw counts are provided in the AnnData object, they need to be normalized."/> <param type="boolean" argument="--transpose_input" label="Transpose input" truevalue="--transpose_input" falsevalue="" help="If the provided matrix is in the gene-by-cell format, please transpose the input to cell-by-gene format."/> <param type="select" name="mode" label="Mode" help="Mode for the prediction (e.g., best match, majority vote)."> <option value="best_match" selected="true">Best match</option> <option value="prob_match">Probability match</option> </param> <param type="float" name="p_thres" value="0.5" label="Probability threshold" help="Probability threshold for assigning a cell type in a multiclass problem. Ignored if Mode set to Best match."/> <param type="boolean" argument="--majority_voting" label="Majority voting" truevalue="--majority_voting" falsevalue="" help="Refine the predicted labels by running the majority voting classifier after over-clustering."/> <param type="text" name="over_clustering" label="Over-clustering" help="If majority voting is set to True, specify the type of over-clustering that is to be performed. This can be specified in the AnnData or an input file specifying the over-clustering per cell. If not present, then the default heuristic over-clustering based on input data will be used."/> </inputs> <outputs> <data format="tabular" name="output_predictions" label="${tool.name} on ${on_string}: cell typing table"/> </outputs> <tests> <test expect_num_outputs="1"> <param name="model_file" value="Healthy_COVID19_PBMC.pkl"/> <param name="normalize" value="true"/> <param name="mode" value="best_match"/> <param name="p_thres" value="0.5"/> <conditional name="inp"> <param name="format" value="h5ad"/> <param name="data" value="raw_counts_pbmc3k.h5ad"/> <param name="gene_symbols_field" value="SYMBOL"/> </conditional> <output name="output_predictions"> <assert_contents> <has_text text="MAIT"/> <has_n_lines n="2701"/> <has_n_columns n="2"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ .. _celltypist_predict_tool: CellTypist: Predict Cell Types ------------------------------ CellTypist is a tool for automatic cell type annotation using single-cell RNA sequencing data. This Galaxy tool wrapper executes the CellTypist script in "predict" mode, allowing you to predict cell types using an existing CellTypist model. Input ^^^^^ - **Input format**: Choose the format for the input files, either AnnData or Matrix market format (10x). - If AnnData is selected, provide the input data (AnnData file). - If Matrix market is selected, provide the input matrix (mtx file), gene file, and cell file. - **Input data**: The input data for prediction. - **Gene file**: The file containing one gene per line corresponding to the genes in the input data (required for .mtx data). - **Cell file**: The file containing one cell per line corresponding to the cells in the input data (required for .mtx data). - **Model file**: The existing CellTypist model file in .pkl format. - **Raw counts layer**: The name of the layer that stores the raw counts. Uses the default matrix if not provided. - **Gene symbols field**: The field in AnnData where the gene symbols are stored, if not in index. - **Normalize**: If raw counts are provided in the AnnData object, they need to be normalized. - **Transpose input**: If the provided matrix is in the gene-by-cell format, transpose the input to cell-by-gene format. - **Mode**: Mode for the prediction (e.g., best match, majority vote). - **Probability threshold**: Probability threshold for assigning a cell type in a multiclass problem. Ignored if Mode set to Best match. - **Majority voting**: Refine the predicted labels by running the majority voting classifier after over-clustering. - **Over-clustering**: If majority voting is set to True, specify the type of over-clustering that is to be performed. This can be specified in the AnnData or an input file specifying the over-clustering per cell. If not provided, the default heuristic over-clustering based on input data will be used. Output ^^^^^^ - **Output predictions**: The predicted cell types for the input data. Citation ^^^^^^^^ - CellTypist: a tool for automatic cell type annotation using single-cell RNA sequencing data doi/10.1126/science.abl5197 History ^^^^^^^ Version 1.6.3+galaxy0: Initial wrapper, Pablo Moreno. CLI written by Alex Proutski. ]]> </help> <citations> <citation type="bibtex">@article{dominguez_conde2022, title={Cross-tissue immune cell analysis reveals tissue-specific features in humans}, author={DomÃnguez Conde, C and Xu, C and Jarvis, LB and Rainbow, DB and Wells, SB and Gomes, T and Howlett, SK and Suchanek, O and Polanski, K and King, HW and Mamanova, L and Huang, N and Szabo, PA and Richardson, L and Bolt, L and Fasouli, ES and Mahbubani, KT and Prete, M and Tuck, L and Richoz, N and Tuong, ZK and Campos, L and Mousa, HS and Needham, EJ and Pritchard, S and Li, T and Elmentaite, R and Park, J and Rahmani, E and Chen, D and Menon, DK and Bayraktar, OA and James, LK and Meyer, KB and Yosef, N and Clatworthy, MR and Sims, PA and Farber, DL and Saeb-Parsy, K and Jones, JL and Teichmann, SA}, journal={Science}, volume={376}, number={6594}, pages={eabl5197}, year={2022}, publisher={American Association for the Advancement of Science}, pmid={35549406}, pmcid={PMC7612735} }</citation> </citations> </tool>
