Mercurial > repos > ebi-gxa > scanpy_find_variable_genes
view scanpy-find-variable-genes.xml @ 25:2861ca3bfe4d draft
planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit 6c9d530aa653101e9e21804393ec11f38cddf027-dirty
author | ebi-gxa |
---|---|
date | Thu, 16 Feb 2023 13:28:53 +0000 |
parents | 82bd7bf93454 |
children | 273faaa2bd69 |
line wrap: on
line source
<?xml version="1.0" encoding="utf-8"?> <tool id="scanpy_find_variable_genes" name="Scanpy FindVariableGenes" version="@TOOL_VERSION@+galaxy9" profile="@PROFILE@"> <description>based on normalised dispersion of expression</description> <macros> <import>scanpy_macros2.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ ln -s '${input_obj_file}' input.h5 && PYTHONIOENCODING=utf-8 scanpy-find-variable-genes --flavor '${flavor}' #if $min_mean --mean-limits ${min_mean} ${max_mean} #end if #if $min_disp --disp-limits ${min_disp} ${max_disp} #end if #if $n_top_gene --n-top-genes ${n_top_gene} #end if #if $span --span ${span} #end if --n-bins '${n_bin}' ${filter} #if $batch_key --batch-key ${batch_key} #end if @INPUT_OPTS@ @OUTPUT_OPTS@ ]]></command> <inputs> <expand macro="input_object_params"/> <expand macro="output_object_params"/> <param name="flavor" argument="--flavor" type="select" label="Flavor of computing normalised dispersion"> <option value="seurat" selected="true">Seurat</option> <option value="cell_ranger">Cell-ranger</option> <option value="seurat_v3" selected="true">Seurat V3</option> </param> <param name="min_mean" argument="--min-mean" type="float" min="0" value="0.0125" optional="true" label="Min value for normalised mean expression (in log1p scale), ignored if flavor='seurat_v3'"/> <param name="max_mean" argument="--max-mean" type="float" min="0" value="3" optional="true" label="Max value for normalised mean expresssion (in log1p scale), Ignored if flavor='seurat_v3'"/> <param name="min_disp" argument="--min-disp" type="float" min="0" value="0.5" optional="true" label="Min value for dispersion of expression, ignored if flavor='seurat_v3'"/> <param name="max_disp" argument="--max-disp" type="float" min="0" value="50" optional="true" label="Max value for dispersion of expresssion, ignored if flavor='seurat_v3'"/> <param name="n_top_gene" argument="--n-top-genes" type="integer" value="2000" optional="true" label="Number of top variable genes to keep, mandatory if flavor='seurat_v3'"/> <param name="span" argument="--span" type="float" min="0" max="1" value="0.3" optional="true" label="The fraction of the data (cells) used when estimating the variance in the loess model fit if flavor='seurat_v3'"/> <param name="n_bin" argument="--n-bins" type="integer" value="20" label="Number of bins for binning the mean expression"/> <param name="filter" argument="--subset" type="boolean" truevalue="--subset" falsevalue="" checked="false" label="Remove genes not marked as highly variable" help="When set, inplace subset to highly-variable genes, otherwise only flag highly-variable genes."/> <param name="batch_key" argument="--batch-key" type="text" label="Batch key" optional="true" help="If specified, highly-variable genes are selected within each batch separately and merged. This simple process avoids the selection of batch-specific genes and acts as a lightweight batch correction method. For all flavors, genes are first sorted by how many batches they are a HVG. For dispersion-based flavors ties are broken by normalized dispersion. If flavor = 'seurat_v3', ties are broken by the median (across batches) rank based on within-batch normalized variance."/> </inputs> <outputs> <expand macro="output_data_obj" description="Variable genes"/> </outputs> <tests> <test> <param name="input_obj_file" value="normalise_data.h5"/> <param name="input_format" value="anndata"/> <param name="output_format" value="anndata"/> <param name="flavor" value="seurat"/> <param name="n_bin" value="20"/> <param name="min_mean" value="0.0125"/> <param name="max_mean" value="3"/> <param name="min_disp" value="0.5"/> <param name="max_disp" value="1e9"/> <output name="output_h5" file="find_variable_genes.h5" ftype="h5" compare="sim_size"/> </test> </tests> <help><![CDATA[ ============================================================== Mark highly variable genes (`scanpy.pp.highly_variable_genes`) ============================================================== Depending on `flavor`, this reproduces the R-implementations of Seurat or Cell Ranger. The normalized dispersion is obtained by scaling with the mean and standard deviation of the dispersions for genes falling into a given bin for mean expression of genes. This means that for each bin of mean expression, highly variable genes are selected. @HELP@ @VERSION_HISTORY@ ]]></help> <expand macro="citations"> <citation type="doi">10.1038/nbt.3192</citation> <citation type="doi">10.1038/ncomms14049</citation> </expand> </tool>