view scanpy-find-variable-genes.xml @ 28:64dd8bb151e1 draft default tip

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/tree/develop/tools/tertiary-analysis/scanpy commit 1161fdc6d874689d9a7a10bbe22b7758b2edc4bf-dirty
author ebi-gxa
date Wed, 15 May 2024 09:39:42 +0000
parents 2bc42a6f092b
children
line wrap: on
line source

<?xml version="1.0" encoding="utf-8"?>
<tool id="scanpy_find_variable_genes" name="Scanpy FindVariableGenes" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">
  <description>based on normalised dispersion of expression</description>
  <macros>
    <import>scanpy_macros2.xml</import>
  </macros>
  <expand macro="requirements"/>
  <command detect_errors="exit_code"><![CDATA[
ln -s '${input_obj_file}' input.h5 &&
PYTHONIOENCODING=utf-8 scanpy-find-variable-genes
    --flavor '${flavor}'
#if $min_mean
    --mean-limits ${min_mean} ${max_mean}
#end if    
#if $min_disp
    --disp-limits ${min_disp} ${max_disp}
#end if    
#if $n_top_gene
    --n-top-genes ${n_top_gene}
#end if
#if $span
    --span ${span}
#end if
    --n-bins '${n_bin}'
    ${filter}
#if $batch_key
    --batch-key ${batch_key}
#end if
#if $never_hvg
    --never-hv-genes-file '${never_hvg}'
#end if
#if $always_hvg
    --always-hv-genes-file '${always_hvg}'
#end if
    @INPUT_OPTS@
    @OUTPUT_OPTS@
]]></command>

  <inputs>
    <expand macro="input_object_params"/>
    <expand macro="output_object_params"/>
    <param name="flavor" argument="--flavor" type="select" label="Flavor of computing normalised dispersion">
      <option value="seurat" selected="true">Seurat</option>
      <option value="cell_ranger">Cell-ranger</option>
      <option value="seurat_v3" selected="true">Seurat V3</option>
    </param>
    <param name="min_mean" argument="--min-mean" type="float" min="0" value="0.0125" optional="true" 
           label="Min value for normalised mean expression (in log1p scale), ignored if flavor='seurat_v3'"/>
    <param name="max_mean" argument="--max-mean" type="float" min="0" value="3" optional="true" 
           label="Max value for normalised mean expresssion (in log1p scale), Ignored if flavor='seurat_v3'"/>
    <param name="min_disp" argument="--min-disp" type="float" min="0" value="0.5" optional="true" 
           label="Min value for dispersion of expression, ignored if flavor='seurat_v3'"/>
    <param name="max_disp" argument="--max-disp" type="float" min="0" value="50" optional="true" 
           label="Max value for dispersion of expresssion, ignored if flavor='seurat_v3'"/>
    <param name="n_top_gene" argument="--n-top-genes" type="integer" value="2000" optional="true" 
           label="Number of top variable genes to keep, mandatory if flavor='seurat_v3'"/>
    <param name="span" argument="--span" type="float" min="0" max="1" value="0.3" optional="true" 
           label="The fraction of the data (cells) used when estimating the variance in the loess model fit if flavor='seurat_v3'"/>
    <param name="n_bin" argument="--n-bins" type="integer" value="20" label="Number of bins for binning the mean expression"/>
    <param name="filter" argument="--subset" type="boolean" truevalue="--subset" falsevalue="" checked="false"
           label="Remove genes not marked as highly variable" help="When set, inplace subset to highly-variable genes, otherwise only flag highly-variable genes."/>
    <param name="batch_key" argument="--batch-key" type="text" label="Batch key" optional="true" help="If specified, highly-variable genes are selected within each batch separately and merged. This simple process avoids the selection of batch-specific genes and acts as a lightweight batch correction method. For all flavors, genes are first sorted by how many batches they are a HVG. For dispersion-based flavors ties are broken by normalized dispersion. If flavor = 'seurat_v3', ties are broken by the median (across batches) rank based on within-batch normalized variance."/>
    <param name="never_hvg" argument="--never-hv-genes-file" optional="true" type="data" label="Genes that should never be HVGs" help="Provide a file, one gene identifier per line, with genes that should never be active as HVGs. This simply makes sure that the boolean value for HVGs is false for all these genes after computing HVGs"/>
    <param name="always_hvg" argument="--always-hv-genes-file" optional="true" type="data" label="Genes that should never be HVGs" help="Provide a file, one gene identifier per line, with genes that should always be active as HVGs. This simply makes sure that the boolean value for HVGs is true for all these genes after computing HVGs"/>
  </inputs>

  <outputs>
    <expand macro="output_data_obj" description="Variable genes"/>
  </outputs>

  <tests>
    <test>
      <param name="input_obj_file" value="normalise_data.h5"/>
      <param name="input_format" value="anndata"/>
      <param name="output_format" value="anndata"/>
      <param name="flavor" value="seurat"/>
      <param name="n_bin" value="20"/>
      <param name="min_mean" value="0.0125"/>
      <param name="max_mean" value="3"/>
      <param name="min_disp" value="0.5"/>
      <param name="max_disp" value="1e9"/>
      <output name="output_h5" ftype="h5">
         <assert_contents>
           <has_h5_keys keys="uns/hvg"/>
         </assert_contents>
      </output>
    </test>
    <test>
      <param name="input_obj_file" value="normalise_data.h5"/>
      <param name="never_hvg" value="never_hvg.txt"/>
      <param name="always_hvg" value="always_hvg.txt"/>
      <param name="input_format" value="anndata"/>
      <param name="output_format" value="anndata"/>
      <param name="flavor" value="seurat"/>
      <param name="n_bin" value="20"/>
      <param name="min_mean" value="0.0125"/>
      <param name="max_mean" value="3"/>
      <param name="min_disp" value="0.5"/>
      <param name="max_disp" value="1e9"/>
      <output name="output_h5" ftype="h5">
         <assert_contents>
           <has_h5_keys keys="uns/hvg"/>
         </assert_contents>
      </output>
    </test>
    <test>
      <param name="input_obj_file" value="normalise_data.h5"/>
      <param name="never_hvg" value="never_hvg.txt"/>
      <param name="input_format" value="anndata"/>
      <param name="output_format" value="anndata"/>
      <param name="flavor" value="seurat"/>
      <param name="n_bin" value="20"/>
      <param name="min_mean" value="0.0125"/>
      <param name="max_mean" value="3"/>
      <param name="min_disp" value="0.5"/>
      <param name="max_disp" value="1e9"/>
      <output name="output_h5" ftype="h5">
         <assert_contents>
           <has_h5_keys keys="uns/hvg"/>
         </assert_contents>
      </output>
    </test>
  </tests>

  <help><![CDATA[
==============================================================
Mark highly variable genes (`scanpy.pp.highly_variable_genes`)
==============================================================

Depending on `flavor`, this reproduces the R-implementations of Seurat or Cell Ranger.

The normalized dispersion is obtained by scaling with the mean and standard
deviation of the dispersions for genes falling into a given bin for mean
expression of genes. This means that for each bin of mean expression, highly
variable genes are selected.

@HELP@

@VERSION_HISTORY@
]]></help>
  <expand macro="citations">
    <citation type="doi">10.1038/nbt.3192</citation>
    <citation type="doi">10.1038/ncomms14049</citation>
  </expand>
</tool>