view seurat.xml @ 4:82fcdf530f87 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit 0d259308f99ef39ab00b80db5a8c5674ba0f3e72"
author iuc
date Mon, 04 May 2020 13:56:11 -0400
parents 7a5cd7987b03
children 06ed31cf52ed
line wrap: on
line source

<tool id="seurat" name="Seurat" version="@TOOL_VERSION@">
    <description>- toolkit for exploration of single-cell RNA-seq data</description>
    <macros>
        <token name="@TOOL_VERSION@">3.1.5</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">r-seurat</requirement>
        <requirement type="package" version="2.1">r-rmarkdown</requirement>
        <!-- Need to pin pandoc due to https://github.com/rstudio/rmarkdown/issues/1740 -->
        <requirement type="package" version="2.7.3">pandoc</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
#if "vln" in $meta.plots:
    #set $vln = 'T'
#else
    #set $vln = 'F'
#end if
#if "feat" in $meta.plots:
    #set $feat = 'T'
#else
    #set $feat = 'F'
#end if
#if "PCs" in $meta.plots:
    #set $PCs = 'T'
#else
    #set $PCs = 'F'
#end if
#if "tsne" in $meta.plots:
    #set $tsne = 'T'
#else
    #set $tsne = 'F'
#end if
#if "heat" in $meta.plots:
    #set $heatmaps = 'T'
#else
    #set $heatmaps = 'F'
#end if
Rscript -e "library(\"rmarkdown\"); render(\"$__tool_directory__/Seurat.R\",
    params = list(counts = \"${counts}\",
        min_cells = \"${adv.min_cells}\",
        min_genes = \"${adv.min_genes}\",
        low_thresholds = \"${adv.low_thresholds}\",
        high_thresholds = \"${adv.high_thresholds}\",
        numPCs = \"${adv.num_PCs}\",
        cells_use = \"${adv.cells_use}\",
        resolution = \"${adv.resolution}\",
        min_pct = \"${adv.min_pct}\",
        logfc_threshold = \"${adv.logfc_threshold}\",
        warn = \"${meta.warn}\",
        varstate = \"${meta.varstate}\",
        showcode = \"${meta.showcode}\",
        vlnfeat = \"$vln\",
        featplot = \"$feat\",
        PCplots = \"$PCs\",
        tsne = \"$tsne\",
        heatmaps = \"$heatmaps\"),
    intermediates_dir = \".\",
    output_format = html_document(),
    output_dir = \".\",
    output_file = \"out.html\")"
    ]]></command>
    <inputs>
        <param name="counts" type="data" format="tabular,tsv" label="Counts file" help="The should be a TAB-separated count matrix with gene identifers in the first column and a header row"/>
        <section name="adv" title="Advanced Options" expanded="true">
            <param name="num_PCs" type="integer" min="0" value="10" label="Number of PCs to use in plots" help="Uses this number of PCs in PCHEatmap, JackStrawPlot, FindClusters, RunTSNE. Default: 10" />
            <param name="min_cells" type="integer" min="0" value="0" label="Minimum cells" help="Include genes with detected expression in at least this many cells." />
            <param name="min_genes" type="integer" min="0"  value="0" label="Minimum genes" help="Include cells where at least this many genes are detected." />
            <param name="low_thresholds" type="integer" value="1" label="Low threshold for filtering cells" />
            <param name="high_thresholds" type="integer" value="20000000" label="High threshold for filtering cells" />
            <param name="cells_use" type="integer" min="1" value="500" label="Cells to use for PCHeatmap" help="Plots this number of top ‘extreme’ cells on both ends of the spectrum, which dramatically speeds plotting for large datasets" />
            <param name="resolution" type="float" value="0.6" label="Resolution parameter" help="Value of the resolution parameter used in FindClusters, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities." />
            <param name="min_pct" type="float" value="0.1" label="Minimum percent cells" help="With FindMarkers only test genes that are detected in a minimum fraction of min.pct cells in either of the two populations. Meant to speed up the function by not testing genes that are very infrequently expressed. Default is 0.1" />
            <param name="logfc_threshold" type="float" min="0" value="0.25" label="LogFC threshold"
                help="With FindMarkers, limit testing to genes which show, on average, at least X-fold difference (log-scale)between the two groups of cells. Default is 0.25 Increasing logfc.threshold speeds up the function, but can miss weaker signals." />
        </section>
        <section name="meta" title="Output options" expanded="true">
            <param name="showcode" type="boolean" truevalue="T" falsevalue="F" checked="false" label="Show code alongside outputs?"/>
            <param name="warn" type="boolean" truevalue="T" falsevalue="F" checked="false" label="Include warnings in the output file (Yes) or pipe to stdout (No)"/>
            <param name="varstate" type="boolean" truevalue="T" falsevalue="F" checked="false" label="Display variable values used in code at the beginning of output file?"/>
            <param name="plots" type="select" optional="true" multiple="true" display="checkboxes" label="Which plots should be output?">
                <option value="vln" selected="true">Violin and Scatter plots</option>
                <option value="feat" selected="true">Feature counts plots</option>
                <option value="PCs" selected="true">PC plots</option>
                <option value="tsne" selected="true">tSNE plots</option>
                <option value="heat" selected="true">Heatmap plots</option>
            </param>
        </section>
    </inputs>
    <outputs>
        <data name="out_html" format="html" from_work_dir="out.html" label="${tool.name} on ${on_string}" />
    </outputs>

    <tests>
        <test>
            <param name="counts" ftype="tabular" value="counts.tab.gz"/>
            <section name="adv">
                <param name="numPCs" value="10" />
                <param name="min_cells" value="3"/>
                <param name="min_genes" value="200"/>
                <param name="low_thresholds" value="1" />
                <param name="high_thresholds" value="20000000" />
                <param name="cells_use" value="500"/>
                <param name="resolution" value="0.6" />
                <param name="min_pct" value="0.25" />
                <param name="logfc_threshold" value="0.25" />
            </section>
            <section name="meta">
                <param name="showcode" value="T"/>
                <param name="warn" value="F"/>
                <param name="varstate" value="F"/>
                <param name="plots" value="feat"/>
            </section>
            <output name="out_html" ftype="html" value="out.html" compare="sim_size" delta="20000" />
        </test>
    </tests>
    <help><![CDATA[
.. class:: infomark

**What it does**

Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data.
It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and
interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse
types of single cell data. See the `Seurat Guided Clustering tutorial`_ for more information.

-----

**Inputs**

    * Gene count matrix in TAB-separated format

-----

**Outputs**

    * HTML of plots

Optionally you can choose to output

    * Seurat RDS object (can use within R)
    * Rscript

.. _Seurat: https://www.nature.com/articles/nbt.4096
.. _Satija Lab: https://satijalab.org/seurat/
.. _Seurat Guided Clustering tutorial: https://satijalab.org/seurat/pbmc3k_tutorial.html

]]></help>
    <citations>
        <citation type="doi">10.1038/nbt.4096</citation>
    </citations>
</tool>