Mercurial > repos > iuc > seurat
changeset 0:8d8412d35247 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit 24c0223b9baa6d59bba381ef94f7e77b1c204d80
author | iuc |
---|---|
date | Sun, 26 Aug 2018 16:24:02 -0400 |
parents | |
children | 7319f83ae734 |
files | seurat.R seurat.xml test-data/deng_small.tab.gz test-data/out.pdf |
diffstat | 4 files changed, 262 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seurat.R Sun Aug 26 16:24:02 2018 -0400 @@ -0,0 +1,116 @@ +options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +suppressPackageStartupMessages({ + library(Seurat) + library(SingleCellExperiment) + library(dplyr) + library(optparse) +}) + +option_list <- list( + make_option(c("-counts","--counts"), type="character", help="Counts file"), + make_option(c("-numPCs","--numPCs"), type="integer", help="Number of PCs to use in plots"), + make_option(c("-min.cells","--min.cells"), type="integer", help="Minimum cells to include"), + make_option(c("-min.genes","--min.genes"), type="integer", help="Minimum genes to include"), + make_option(c("-low.thresholds","--low.thresholds"), type="double", help="Low threshold for filtering cells"), + make_option(c("-high.thresholds","--high.thresholds"), type="double", help="High threshold for filtering cells"), + make_option(c("-x.low.cutoff","--x.low.cutoff"), type="double", help="X-axis low cutoff for variable genes"), + make_option(c("-x.high.cutoff","--x.high.cutoff"), type="double", help="X-axis high cutoff for variable genes"), + make_option(c("-y.cutoff","--y.cutoff"), type="double", help="Y-axis cutoff for variable genes"), + make_option(c("-cells.use","--cells.use"), type="integer", help="Cells to use for PCHeatmap"), + make_option(c("-resolution","--resolution"), type="double", help="Resolution in FindClusters"), + make_option(c("-min.pct","--min.pct"), type="double", help="Minimum percent cells in FindClusters"), + make_option(c("-logfc.threshold","--logfc.threshold"), type="double", help="LogFC threshold in FindClusters"), + make_option(c("-rds","--rds"), type="logical", help="Output Seurat RDS object") + ) + +parser <- OptionParser(usage = "%prog [options] file", option_list=option_list) +args = parse_args(parser) + +counts <- read.delim(args$counts, row.names=1) +seuset <- CreateSeuratObject(raw.data = counts, min.cells = args$min.cells, min.genes = args$min.cells) + +# Open PDF for plots +pdf("out.pdf") + +VlnPlot(object = seuset, features.plot = c("nGene", "nUMI"), nCol = 2) +GenePlot(object = seuset, gene1 = "nUMI", gene2 = "nGene") + +print("Filtering cells") +if (!is.null(args$low.thresholds)){ + lowthresh <- args$low.thresholds +} else { + lowthresh <- "-Inf" +} +if (!is.null(args$high.thresholds)){ + highthresh <- args$high.thresholds +} else { + highthresh <- "Inf" +} +seuset <- FilterCells(object = seuset, subset.names = c("nUMI"), + low.thresholds=c(lowthresh), high.thresholds = c(highthresh)) + +print("Normalizing the data") +seuset <- NormalizeData(object = seuset, normalization.method = "LogNormalize", + scale.factor = 10000) + +print("Finding variable genes") +seuset <- FindVariableGenes(object = seuset, mean.function = ExpMean, + dispersion.function = LogVMR, + x.low.cutoff = args$x.low.cutoff, + x.high.cutoff = args$x.high.cutoff,, + y.cutoff = args$y.cutoff +) + +print("Scaling the data and removing unwanted sources of variation") +seuset <- ScaleData(object = seuset, vars.to.regress = c("nUMI")) + +print("Performing PCA analysis") +seuset <- RunPCA(object = seuset, pc.genes = seuset@var.genes) +VizPCA(object = seuset, pcs.use = 1:2) +PCAPlot(object = seuset, dim.1 = 1, dim.2 = 2) +PCHeatmap( + object = seuset, + pc.use = 1:args$numPCs, + cells.use = args$cell.use, + do.balanced = TRUE, + label.columns = FALSE, + use.full = FALSE +) + +print("Determining statistically significant principal components") +seuset <- JackStraw(object = seuset, num.replicate = 100, display.progress= FALSE) +JackStrawPlot(object = seuset, PCs = 1:args$numPCs) +PCElbowPlot(object = seuset) + +print("Clustering the cells") +seuset <- FindClusters( + object = seuset, + reduction.type = "pca", + dims.use = 1:args$numPCs, + resolution = args$resolution, + print.output = 0, + save.SNN = TRUE +) + +print("Running non-linear dimensional reduction (tSNE)") +seuset <- RunTSNE(object = seuset, dims.use = 1:args$numPCs, do.fast = TRUE) +TSNEPlot(object = seuset) + +print("Finding differentially expressed genes (cluster biomarkers)") +markers <- FindAllMarkers(object = seuset, only.pos = TRUE, min.pct = args$min.pct, + logfc.threshold = args$logfc.threshold) +top10 <- markers %>% group_by(cluster) %>% top_n(10, avg_logFC) +DoHeatmap(object = seuset, genes.use = top10$gene, slim.col.label = TRUE, remove.key = TRUE) + +# Close PDF for plots +dev.off() + +if (!is.null(args$rds) ) { + saveRDS(seuset, "Seurat.rds") +} + +sessionInfo() \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/seurat.xml Sun Aug 26 16:24:02 2018 -0400 @@ -0,0 +1,146 @@ +<tool id="seurat" name="Seurat" version="2.3.4"> + <description>- toolkit for exploration of single-cell RNA-seq data</description> + <requirements> + <requirement type="package" version="3.4.1">r-base</requirement> + <requirement type="package" version="2.3.4">r-seurat</requirement> + <requirement type="package" version="1.0.0">bioconductor-singlecellexperiment</requirement> + <requirement type="package" version="1.6.0">r-optparse</requirement> + </requirements> + <version_command><![CDATA[ +echo $(R --version | grep version | grep -v GNU)", seurat version" $(R --vanilla --slave -e "library(seurat); cat(sessionInfo()\$otherPkgs\$seurat\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", SingleCellExperiment version" $(R --vanilla --slave -e "library(SingleCellExperiment); cat(sessionInfo()\$otherPkgs\$SingleCellExperiment\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", optparse version" $(R --vanilla --slave -e "library(optparse); cat(sessionInfo()\$otherPkgs\$optparse\$Version)" 2> /dev/null | grep -v -i "WARNING: ") + ]]></version_command> + <command detect_errors="exit_code"><![CDATA[ + +#if $rscript: + cp '$__tool_directory__/seurat.R' '$out_rscript' && +#end if + +Rscript '$__tool_directory__/seurat.R' + +--counts '$counts' +--numPCs $adv.num_PCs +--min.cells $adv.min_cells +--min.genes $adv.min_genes + +#if $adv.low_thresholds: + --low.thresholds $adv.low_thresholds +#end if +#if $adv.high_thresholds: + --high.thresholds $adv.high_thresholds +#end if +#if $adv.x_low_cutoff: + --x.low.cutoff $adv.x_low_cutoff +#end if +#if $adv.x_high_cutoff: + --x.high.cutoff $adv.x_high_cutoff +#end if +#if $adv.y_cutoff: + --y.cutoff $adv.y_cutoff +#end if +#if $adv.cells_use: + --cells.use $adv.cells_use +#end if +#if $adv.resolution: + --resolution $adv.resolution +#end if +#if $adv.min_pct: + --min.pct $adv.min_pct +#end if +#if $adv.logfc_threshold: + --logfc.threshold $adv.logfc_threshold +#end if + +#if $rds: + --rds '$rds' +#end if + +]]></command> + + <inputs> + <param name="counts" type="data" format="tabular" label="Counts file" help="The should be a TAB-separated count matrix with gene identifers in the first column and a header row"/> + <param name="rscript" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Output Rscript?" help="If this option is set to Yes, the Rscript used by this tool will be provided as a text file in the output. Default: No" /> + <param name="rds" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Output Seurat RDS object?" + help="Output the Seurat RDS object, can be loaded into R. Default: No"> + </param> + <section name="adv" title="Advanced Options"> + <param argument="--num_PCs" type="integer" min="0" value="10" label="Number of PCs to use in plots" help="Uses this number of PCs in PCHEatmap, JackStrawPlot, FindClusters, RunTSNE. Default: 10" /> + <param argument="--min_cells" type="integer" min="0" value="0" label="Minimum cells" help="Include genes with detected expression in at least this many cells." /> + <param argument="--min_genes" type="integer" min="0" value="0" label="Minimum genes" help="Include cells where at least this many genes are detected." /> + <param argument="--low_thresholds" type="float" optional="True" label="Low threshold for filtering cells" /> + <param argument="--high_thresholds" type="float" optional="True" label="High threshold for filtering cells" /> + <param argument="--x_low_cutoff" type="float" optional="True" label="X-axis low cutoff for variable genes" help="Bottom cutoff on x-axis for identifying variable genes" /> + <param argument="--x_high_cutoff" type="float" optional="True" label="X-axis high cutoff for variable genes" help="Top cutoff on x-axis for identifying variable genes" /> + <param argument="--y_cutoff" type="float" optional="True" label="Y-axis cutoff for variable genes" help="Bottom cutoff on y-axis for identifying variable genes" /> + <param argument="--cells_use" type="integer" min="0" optional="True" label="Cells to use for PCHeatmap" help="Plots this number of top ‘extreme’ cells on both ends of the spectrum, which dramatically speeds plotting for large datasets" /> + <param argument="--resolution" type="float" optional="True" label="Resolution parameter" help="Value of the resolution parameter used in FindClusters, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities." /> + <param argument="--min_pct" type="float" optional="True" label="Minimum percent cells" help="With FindMarkers only test genes that are detected in a minimum fraction of min.pct cells in either of the two populations. Meant to speed up the function by not testing genes that are very infrequently expressed. Default is 0.1" /> + <param argument="--logfc_threshold" type="float" min="0" optional="True" label="LogFC threshold" + help="With FindMarkers, limit testing to genes which show, on average, at least X-fold difference (log-scale)between the two groups of cells. Default is 0.25 Increasing logfc.threshold speeds up the function, but can miss weaker signals." /> + </section> + </inputs> + + <outputs> + <data name="out_pdf" format="pdf" from_work_dir="out.pdf" label="${tool.name} on ${on_string}: Plots" /> + <data name="out_rscript" format="txt" from_work_dir="out_rscript.txt" label="${tool.name} on ${on_string}: Rscript"> + <filter>rscript</filter> + </data> + <data name="out_rds" format="rds" from_work_dir="Seurat.rds" label="${tool.name} on ${on_string}: RData file"> + <filter>rds</filter> + </data> + </outputs> + + <tests> + <!-- Ensure count matrix input works --> + <test> + <param name="counts" ftype="tabular" value="deng_small.tab.gz"/> + <param name="min_cells" value="3"/> + <param name="min_genes" value="200"/> + <param name="low_thresholds" value="1" /> + <param name="high_thresholds" value="20000000" /> + <param name="x_low_cutoff" value="0.0125" /> + <param name="x_high_cutoff" value="3" /> + <param name="y_cutoff" value="0.5" /> + <param name="numPCs" value="10" /> + <param name="cells_use" value="500"/> + <param name="resolution" value="0.6" /> + <param name="min_pct" value="0.25" /> + <param name="logfc_threshold" value="0.25" /> + <output name="out_pdf" ftype="pdf" value="out.pdf" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +Seurat_ is a toolkit for quality control, analysis, and exploration of single cell RNA sequencing data. +It is developed and maintained by the `Satija Lab`_ at NYGC. Seurat aims to enable users to identify and +interpret sources of heterogeneity from single cell transcriptomic measurements, and to integrate diverse +types of single cell data. See the `Seurat Guided Clustering tutorial`_ for more information. + +----- + +**Inputs** + + * Gene count matrix in TAB-separated format + +----- + +**Outputs** + + * PDF of plots + +Optionally you can choose to output + + * Seurat RDS object (can use within R) + * Rscript + +.. _Seurat: https://www.nature.com/articles/nbt.4096 +.. _Satija Lab: https://satijalab.org/seurat/ +.. _Seurat Guided Clustering tutorial: https://satijalab.org/seurat/pbmc3k_tutorial.html + +]]></help> + <citations> + <citation type="doi">10.1038/nbt.4096</citation> + </citations> +</tool>