diff seurat_find_variable_genes.xml @ 0:a56efad05337 draft

planemo upload for repository https://github.com/ebi-gene-expression-group/container-galaxy-sc-tertiary/ commit 9bf9a6e46a330890be932f60d1d996dd166426c4
author ebi-gxa
date Wed, 03 Apr 2019 11:17:53 -0400
parents
children a6077346f869
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/seurat_find_variable_genes.xml	Wed Apr 03 11:17:53 2019 -0400
@@ -0,0 +1,106 @@
+<tool id="seurat_find_variable_genes" name="Seurat FindVariableGenes" version="2.3.1+galaxy1">
+    <description>identify variable genes</description>
+    <macros>
+        <import>seurat_macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="version" />
+    <command detect_errors="exit_code"><![CDATA[
+seurat-find-variable-genes.R
+
+--input-object-file '$input'
+#if $mean:
+    --mean-function '$mean'
+#end if
+#if $disp:
+--dispersion-function $disp
+#end if
+#if $xlow:
+--x-low-cutoff $xlow
+#end if
+#if $xhigh:
+--x-high-cutoff $xhigh
+#end if
+#if $ylow:
+--y-low-cutoff $ylow
+#end if
+#if $yhigh:
+--y-high-cutoff $yhigh
+#end if
+--output-object-file '$output'
+--output-text-file '$output_tab'
+]]></command>
+
+    <inputs>
+        <param name="input" argument="--input-object-file" type="data" format="rdata" label="Seurat RDS object" help="R serialized object for Seurat, normally the one produced by Seurat FindVariableGenes" />
+        <param name="mean" argument="--mean-function" type="text" optional="True" label="Mean function" help="Function to compute x-axis value (average expression). Default is to take the mean of the detected (i.e. non-zero) values."/>
+        <param name="disp" argument="--dispersion-function" type="text" optional="True" label="Dispersion function" help="Function to compute y-axis value (dispersion). Default is to take the standard deviation of all values." />
+        <param name="xlow" argument="--x-low-cutoff" type="float" optional="True" label="X-axis low cutoff" help="Bottom cutoff on x-axis for identifying variable genes."/>
+        <param name="xhigh" argument="--x-high-cutoff" type="float" optional="True" label="X-axis high cutoff" help="Top cutoff on x-axis for identifying variable genes."/>
+        <param name="ylow" argument="--y-low-cutoff" type="float" optional="True" label="Y-axis low cutoff" help="Bottom cutoff on y-axis for identifying variable genes."/>
+        <param name="yhigh" argument="--y-high-cutoff" type="float" optional="True" label="Y-axis high cutoff" help="Top cutoff on y-axis for identifying variable genes."/>
+    </inputs>
+
+    <outputs>
+        <data name="output" format="rdata" from_work_dir="*.rds" label="${tool.name} on ${on_string}: Seurat RDS"/>
+        <data name="output_tab" format="tabular" from_work_dir="*.tab" label="${tool.name} on ${on_string}: Variable genes tabular file"/>
+    </outputs>
+
+    <tests>
+        <test>
+            <param name="input" ftype="rdata" value="out_norm.rds"/>
+            <output name="output" ftype="rdata" value="out_findvar.rds" compare="sim_size"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+.. class:: infomark
+
+**What it does**
+
+Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data.
+It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and
+interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse
+types of single cell data.
+
+This tool identifies genes that are outliers on a 'mean variability plot'. First, uses
+a function to calculate average expression (mean.function) and dispersion (dispersion.function)
+for each gene. Next, divides genes into num.bin (deafult 20) bins based on
+their average expression, and calculates z-scores for dispersion within each
+bin. The purpose of this is to identify variable genes while controlling for
+the strong relationship between variability and average expression.
+
+Exact parameter settings may vary empirically from dataset to dataset, and
+based on visual inspection of the plot.
+
+Setting the y.cutoff parameter to 2 identifies genes that are more than two standard
+deviations away from the average dispersion within a bin. The default X-axis function
+is the mean expression level, and for Y-axis it is the log(Variance/mean). All mean/variance
+calculations are not performed in log-space, but the results are reported in log-space -
+see relevant functions for exact details.
+
+-----
+
+**Inputs**
+
+    * Seurat RDS object
+    * Mean function. Function to compute x-axis value (average expression). Default is to take the mean of the detected (i.e. non-zero) values.
+    * Dispersion function. Function to compute y-axis value (dispersion). Default is to take the standard deviation of all values.
+    * Bottom cutoff on x-axis for identifying variable genes.
+    * Top cutoff on x-axis for identifying variable genes.
+    * Bottom cutoff on y-axis for identifying variable genes.
+    * Top cutoff on y-axis for identifying variable genes.
+
+-----
+
+**Outputs**
+
+    * Seurat RDS object. Places variable genes in object@var.genes. The result of all analysis is stored in object@hvg.info
+    * Tabular file of variable genes
+
+.. _Seurat: https://www.nature.com/articles/nbt.4096
+.. _Satija Lab: https://satijalab.org/seurat/
+
+@VERSION_HISTORY@
+]]></help>
+      <expand macro="citations" />
+</tool>