view seurat.xml @ 0:8d8412d35247 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit 24c0223b9baa6d59bba381ef94f7e77b1c204d80
author iuc
date Sun, 26 Aug 2018 16:24:02 -0400
parents
children 7319f83ae734
line wrap: on
line source

<tool id="seurat" name="Seurat" version="2.3.4">
    <description>- toolkit for exploration of single-cell RNA-seq data</description>
    <requirements>
        <requirement type="package" version="3.4.1">r-base</requirement>
        <requirement type="package" version="2.3.4">r-seurat</requirement>
        <requirement type="package" version="1.0.0">bioconductor-singlecellexperiment</requirement> 
        <requirement type="package" version="1.6.0">r-optparse</requirement>
    </requirements>
    <version_command><![CDATA[
echo $(R --version | grep version | grep -v GNU)", seurat version" $(R --vanilla --slave -e "library(seurat); cat(sessionInfo()\$otherPkgs\$seurat\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", SingleCellExperiment version" $(R --vanilla --slave -e "library(SingleCellExperiment); cat(sessionInfo()\$otherPkgs\$SingleCellExperiment\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
    ]]></version_command>
    <command detect_errors="exit_code"><![CDATA[

#if $rscript:
    cp '$__tool_directory__/seurat.R' '$out_rscript' &&
#end if

Rscript '$__tool_directory__/seurat.R'

--counts '$counts'
--numPCs $adv.num_PCs
--min.cells $adv.min_cells
--min.genes $adv.min_genes

#if $adv.low_thresholds:
    --low.thresholds $adv.low_thresholds
#end if
#if $adv.high_thresholds:
    --high.thresholds $adv.high_thresholds
#end if
#if $adv.x_low_cutoff:
    --x.low.cutoff $adv.x_low_cutoff
#end if
#if $adv.x_high_cutoff:
    --x.high.cutoff $adv.x_high_cutoff
#end if
#if $adv.y_cutoff:
    --y.cutoff $adv.y_cutoff
#end if
#if $adv.cells_use:
    --cells.use $adv.cells_use
#end if
#if $adv.resolution:
    --resolution $adv.resolution
#end if
#if $adv.min_pct:
    --min.pct $adv.min_pct
#end if
#if $adv.logfc_threshold:
    --logfc.threshold $adv.logfc_threshold
#end if

#if $rds:
    --rds '$rds'
#end if

]]></command>

    <inputs>
        <param name="counts" type="data" format="tabular" label="Counts file" help="The should be a TAB-separated count matrix with gene identifers in the first column and a header row"/>
        <param name="rscript" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used by this tool will be provided as a text file in the output. Default: No" />
        <param name="rds" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Seurat RDS object?"
            help="Output the Seurat RDS object, can be loaded into R. Default: No">
        </param>
        <section name="adv" title="Advanced Options">
            <param argument="--num_PCs" type="integer" min="0" value="10" label="Number of PCs to use in plots" help="Uses this number of PCs in PCHEatmap, JackStrawPlot, FindClusters, RunTSNE. Default: 10" />
            <param argument="--min_cells" type="integer" min="0" value="0" label="Minimum cells" help="Include genes with detected expression in at least this many cells." />
            <param argument="--min_genes" type="integer" min="0"  value="0" label="Minimum genes" help="Include cells where at least this many genes are detected." />
            <param argument="--low_thresholds" type="float" optional="True" label="Low threshold for filtering cells" />
            <param argument="--high_thresholds" type="float" optional="True" label="High threshold for filtering cells" />
            <param argument="--x_low_cutoff" type="float" optional="True" label="X-axis low cutoff for variable genes" help="Bottom cutoff on x-axis for identifying variable genes" />
            <param argument="--x_high_cutoff" type="float" optional="True" label="X-axis high cutoff for variable genes" help="Top cutoff on x-axis for identifying variable genes" />
            <param argument="--y_cutoff" type="float" optional="True" label="Y-axis cutoff for variable genes" help="Bottom cutoff on y-axis for identifying variable genes" />
            <param argument="--cells_use" type="integer" min="0" optional="True" label="Cells to use for PCHeatmap" help="Plots this number of top ‘extreme’ cells on both ends of the spectrum, which dramatically speeds plotting for large datasets" />
            <param argument="--resolution" type="float" optional="True" label="Resolution parameter" help="Value of the resolution parameter used in FindClusters, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities." />
            <param argument="--min_pct" type="float" optional="True" label="Minimum percent cells" help="With FindMarkers only test genes that are detected in a minimum fraction of min.pct cells in either of the two populations. Meant to speed up the function by not testing genes that are very infrequently expressed. Default is 0.1" />
            <param argument="--logfc_threshold" type="float" min="0" optional="True" label="LogFC threshold"
                help="With FindMarkers, limit testing to genes which show, on average, at least X-fold difference (log-scale)between the two groups of cells. Default is 0.25 Increasing logfc.threshold speeds up the function, but can miss weaker signals." />
        </section>
    </inputs>

    <outputs>
        <data name="out_pdf" format="pdf" from_work_dir="out.pdf" label="${tool.name} on ${on_string}: Plots" />
        <data name="out_rscript" format="txt" from_work_dir="out_rscript.txt" label="${tool.name} on ${on_string}: Rscript">
            <filter>rscript</filter>
        </data>
        <data name="out_rds" format="rds" from_work_dir="Seurat.rds" label="${tool.name} on ${on_string}: RData file">
            <filter>rds</filter>
        </data>
    </outputs>

    <tests>
        <!-- Ensure count matrix input works -->
        <test>
            <param name="counts" ftype="tabular" value="deng_small.tab.gz"/>
            <param name="min_cells" value="3"/>
            <param name="min_genes" value="200"/>
            <param name="low_thresholds" value="1" />
            <param name="high_thresholds" value="20000000" />
            <param name="x_low_cutoff" value="0.0125" />
            <param name="x_high_cutoff" value="3" />
            <param name="y_cutoff" value="0.5" />
            <param name="numPCs" value="10" />
            <param name="cells_use" value="500"/>
            <param name="resolution" value="0.6" />
            <param name="min_pct" value="0.25" />
            <param name="logfc_threshold" value="0.25" />
            <output name="out_pdf" ftype="pdf" value="out.pdf" compare="sim_size"/>
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data.
It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and
interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse
types of single cell data. See the `Seurat Guided Clustering tutorial`_ for more information.

-----

**Inputs**

    * Gene count matrix in TAB-separated format

-----

**Outputs**

    * PDF of plots

Optionally you can choose to output

    * Seurat RDS object (can use within R)
    * Rscript

.. _Seurat: https://www.nature.com/articles/nbt.4096
.. _Satija Lab: https://satijalab.org/seurat/
.. _Seurat Guided Clustering tutorial: https://satijalab.org/seurat/pbmc3k_tutorial.html

]]></help>
    <citations>
        <citation type="doi">10.1038/nbt.4096</citation>
    </citations>
</tool>