Mercurial > repos > ebi-gxa > celltypist_predict
diff celltypist.xml @ 0:a7d6985ba791 draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 53651c0be9daeebb0921f3b5e542323304dfdc98-dirty
| author | ebi-gxa |
|---|---|
| date | Fri, 14 Feb 2025 11:56:48 +0000 |
| parents | |
| children | df005630100e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/celltypist.xml Fri Feb 14 11:56:48 2025 +0000 @@ -0,0 +1,164 @@ +<tool id="celltypist_predict" name="CellTypist: Predict Cell Types" version="1.6.3+galaxy0" profile="20.05" license="MIT"> + <description>Predict cell types using an existing CellTypist model.</description> + <requirements> + <requirement type="package" version="1.6.3">celltypist</requirement> + </requirements> + <command detect_errors="exit_code"> + <![CDATA[ + #if $inp.format == 'h5ad': + ln -s "$inp.data" input.h5ad && + #else: + ln -s "$inp.data" input.mtx && + #end if + ln -s "$model_file" model.pkl && + python $__tool_directory__/celltypist_CLI.py --action predict + --output_path "$output_predictions" + --model model.pkl + #if $inp.format == 'h5ad': + --input_path input.h5ad + #if $inp.raw_counts_layer: + --raw_counts_layer "$inp.raw_counts_layer" + #end if + #if $inp.gene_symbols_field: + --gene_symbols_field "$inp.gene_symbols_field" + #end if + #else: + --input_path input.mtx + --gene_file "$inp.gene_file" + --cell_file "$inp.cell_file" + #end if + + ${normalize} + ${transpose_input} + --mode "$mode" + --p_thres "$p_thres" + ${majority_voting} + #if $over_clustering: + --over_clustering "$over_clustering" + #end if + ]]> + </command> + <inputs> + <conditional name="inp" label="Input format"> + <param name="format" type="select" label="Input format" help="Format for the input files, either AnnData or Matrix market format (10x)"> + <option value="h5ad">AnnData</option> + <option value="mtx">Matrix market</option> + </param> + <when value="h5ad"> + <param format="h5ad" type="data" name="data" label="Input data (AnnData)" help="The input data for prediction."/> + <param type="text" name="raw_counts_layer" label="Raw counts layer" help="The name of the layer that stores the raw counts. Uses default matrix if not present."/> + <param type="text" name="gene_symbols_field" label="Gene symbols field" help="The field in AnnData where the gene symbols are stored, if not in index."/> + </when> + <when value="mtx"> + <param format="mtx,txt" type="data" name="data" label="Input matrix (mtx file)" help="The input data for prediction."/> + <param format="txt,tabular" type="data" name="gene_file" label="Gene file" help="The file containing one gene per line corresponding to the genes in the input data (required for .mtx data)."/> + <param format="txt,tabular" type="data" name="cell_file" label="Cell file" help="The file containing one cell per line corresponding to the cells in the input data (required for .mtx data)."/> + </when> + </conditional> + <param format="data" type="data" name="model_file" optional="false" label="Model file" help="The existing CellTypist model file in .pkl format."/> + <param type="boolean" argument="--normalize" label="Normalize" truevalue="--normalize" falsevalue="" help="If raw counts are provided in the AnnData object, they need to be normalized."/> + <param type="boolean" argument="--transpose_input" label="Transpose input" truevalue="--transpose_input" falsevalue="" help="If the provided matrix is in the gene-by-cell format, please transpose the input to cell-by-gene format."/> + <param type="select" name="mode" label="Mode" help="Mode for the prediction (e.g., best match, majority vote)."> + <option value="best_match" selected="true">Best match</option> + <option value="prob_match">Probability match</option> + </param> + <param type="float" name="p_thres" value="0.5" label="Probability threshold" help="Probability threshold for assigning a cell type in a multiclass problem. Ignored if Mode set to Best match."/> + <param type="boolean" argument="--majority_voting" label="Majority voting" truevalue="--majority_voting" falsevalue="" help="Refine the predicted labels by running the majority voting classifier after over-clustering."/> + <param type="text" name="over_clustering" label="Over-clustering" help="If majority voting is set to True, specify the type of over-clustering that is to be performed. This can be specified in the AnnData or an input file specifying the over-clustering per cell. If not present, then the default heuristic over-clustering based on input data will be used."/> + </inputs> + <outputs> + <data format="csv" name="output_predictions" label="${tool.name} on ${on_string}: cell typing table"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="model_file" value="Healthy_COVID19_PBMC.pkl"/> + <param name="normalize" value="true"/> + <param name="mode" value="best_match"/> + <param name="p_thres" value="0.5"/> + <conditional name="inp"> + <param name="format" value="h5ad"/> + <param name="data" value="raw_counts_pbmc3k.h5ad"/> + <param name="gene_symbols_field" value="SYMBOL"/> + </conditional> + <output name="output_predictions"> + <assert_contents> + <has_text text="MAIT"/> + <has_n_lines n="2701"/> + <!-- <has_line line=""/> --> + <!-- <has_line_matching expression=""/> --> + <has_n_columns n="2"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +.. _celltypist_predict_tool: + +CellTypist: Predict Cell Types +------------------------------ + +CellTypist is a tool for automatic cell type annotation using single-cell RNA sequencing data. This Galaxy tool wrapper executes the CellTypist script in "predict" mode, allowing you to predict cell types using an existing CellTypist model. + +Input +^^^^^ + +- **Input format**: Choose the format for the input files, either AnnData or Matrix market format (10x). + + - If AnnData is selected, provide the input data (AnnData file). + - If Matrix market is selected, provide the input matrix (mtx file), gene file, and cell file. + +- **Input data**: The input data for prediction. + +- **Gene file**: The file containing one gene per line corresponding to the genes in the input data (required for .mtx data). + +- **Cell file**: The file containing one cell per line corresponding to the cells in the input data (required for .mtx data). + +- **Model file**: The existing CellTypist model file in .pkl format. + +- **Raw counts layer**: The name of the layer that stores the raw counts. Uses the default matrix if not provided. + +- **Gene symbols field**: The field in AnnData where the gene symbols are stored, if not in index. + +- **Normalize**: If raw counts are provided in the AnnData object, they need to be normalized. + +- **Transpose input**: If the provided matrix is in the gene-by-cell format, transpose the input to cell-by-gene format. + +- **Mode**: Mode for the prediction (e.g., best match, majority vote). + +- **Probability threshold**: Probability threshold for assigning a cell type in a multiclass problem. Ignored if Mode set to Best match. + +- **Majority voting**: Refine the predicted labels by running the majority voting classifier after over-clustering. + +- **Over-clustering**: If majority voting is set to True, specify the type of over-clustering that is to be performed. This can be specified in the AnnData or an input file specifying the over-clustering per cell. If not provided, the default heuristic over-clustering based on input data will be used. + +Output +^^^^^^ + +- **Output predictions**: The predicted cell types for the input data. + +Citation +^^^^^^^^ + +- CellTypist: a tool for automatic cell type annotation using single-cell RNA sequencing data doi/10.1126/science.abl5197 + +History +^^^^^^^ + +Version 1.6.3+galaxy0: Initial wrapper, Pablo Moreno. CLI written by Alex Proutski. + ]]> + </help> + <citations> + <citation type="bibtex">@article{dominguez_conde2022, + title={Cross-tissue immune cell analysis reveals tissue-specific features in humans}, + author={DomÃnguez Conde, C and Xu, C and Jarvis, LB and Rainbow, DB and Wells, SB and Gomes, T and Howlett, SK and Suchanek, O and Polanski, K and King, HW and Mamanova, L and Huang, N and Szabo, PA and Richardson, L and Bolt, L and Fasouli, ES and Mahbubani, KT and Prete, M and Tuck, L and Richoz, N and Tuong, ZK and Campos, L and Mousa, HS and Needham, EJ and Pritchard, S and Li, T and Elmentaite, R and Park, J and Rahmani, E and Chen, D and Menon, DK and Bayraktar, OA and James, LK and Meyer, KB and Yosef, N and Clatworthy, MR and Sims, PA and Farber, DL and Saeb-Parsy, K and Jones, JL and Teichmann, SA}, + journal={Science}, + volume={376}, + number={6594}, + pages={eabl5197}, + year={2022}, + publisher={American Association for the Advancement of Science}, + pmid={35549406}, + pmcid={PMC7612735} + }</citation> + </citations> +</tool>
