Mercurial > repos > iuc > seurat
changeset 6:764f076e9d52 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit e8eeff2efea68e1e5bc697d0ff6a5808cd978db2"
author | iuc |
---|---|
date | Thu, 23 Jul 2020 11:22:05 -0400 |
parents | 06ed31cf52ed |
children | 4c139a9415d7 |
files | Seurat.R seurat.xml |
diffstat | 2 files changed, 62 insertions(+), 23 deletions(-) [+] |
line wrap: on
line diff
--- a/Seurat.R Mon Jun 08 17:40:51 2020 -0400 +++ b/Seurat.R Thu Jul 23 11:22:05 2020 -0400 @@ -10,6 +10,7 @@ #' numPCs: "" #' cells_use: "" #' resolution: "" +#' perplexity: "" #' min_pct: "" #' logfc_threshold: "" #' showcode: "" @@ -22,89 +23,98 @@ #' heatmaps: "" #' --- +# nolint start #+ echo=F, warning = F, message=F -options(show.error.messages = F, error = function(){cat(geterrmessage(), file = stderr()); q("no", 1, F)}) +options(show.error.messages = F, error = function() { + cat(geterrmessage(), file = stderr()); q("no", 1, F) +}) showcode <- as.logical(params$showcode) warn <- as.logical(params$warn) varstate <- as.logical(params$varstate) vlnfeat <- as.logical(params$vlnfeat) featplot <- as.logical(params$featplot) -PCplots <- as.logical(params$PCplots) +pc_plots <- as.logical(params$PCplots) tsne <- as.logical(params$tsne) heatmaps <- as.logical(params$heatmaps) # we need that to not crash Galaxy with an UTF-8 error on German LC settings. loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") - #+ echo = F, warning = `warn`, include =`varstate` min_cells <- as.integer(params$min_cells) min_genes <- as.integer(params$min_genes) low_thresholds <- as.integer(params$low_thresholds) high_thresholds <- as.integer(params$high_thresholds) -numPCs <- as.integer(params$numPCs) +num_pcs <- as.integer(params$numPCs) cells_use <- as.integer(params$cells_use) resolution <- as.double(params$resolution) +perplexity <- as.integer(params$perplexity) min_pct <- as.double(params$min_pct) logfc_threshold <- as.double(params$logfc_thresh) print(paste0("Minimum cells: ", min_cells)) print(paste0("Minimum features: ", min_genes)) print(paste0("Umi low threshold: ", low_thresholds)) print(paste0("Umi high threshold: ", high_thresholds)) -print(paste0("Number of principal components: ", numPCs)) +print(paste0("Number of principal components: ", num_pcs)) print(paste0("Resolution: ", resolution)) +print(paste0("Perplexity: ", perplexity)) print(paste0("Minimum percent of cells", min_pct)) print(paste0("Logfold change threshold", logfc_threshold)) #+ echo = FALSE -if(showcode == TRUE) print("Read in data, generate inital Seurat object") +if (showcode == TRUE) print("Read in data, generate inital Seurat object") #+ echo = `showcode`, warning = `warn`, message = F counts <- read.delim(params$counts, row.names = 1) seuset <- Seurat::CreateSeuratObject(counts = counts, min.cells = min_cells, min.features = min_genes) #+ echo = FALSE -if(showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization") -#+ echo = `showcode`, warning = `warn`, include=`vlnfeat` +if (showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization") +#+ echo = `showcode`, warning = `warn`, include=`vlnfeat` Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA")) Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") #+ echo = FALSE -if(showcode == TRUE) print("Filter and normalize for UMI counts") +if (showcode == TRUE) print("Filter and normalize for UMI counts") #+ echo = `showcode`, warning = `warn` seuset <- subset(seuset, subset = `nCount_RNA` > low_thresholds & `nCount_RNA` < high_thresholds) seuset <- Seurat::NormalizeData(seuset, normalization.method = "LogNormalize", scale.factor = 10000) #+ echo = FALSE -if(showcode == TRUE && featplot == TRUE) print("Variable Genes") +if (showcode == TRUE && featplot == TRUE) print("Variable Genes") #+ echo = `showcode`, warning = `warn`, include = `featplot` seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp") Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp") seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA") #+ echo = FALSE -if(showcode == TRUE && PCplots == TRUE) print("PCA Visualization") -#+ echo = `showcode`, warning = `warn`, include = `PCplots` -seuset <- Seurat::RunPCA(seuset, npcs = numPCs) +if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization") +#+ echo = `showcode`, warning = `warn`, include = `pc_plots` +seuset <- Seurat::RunPCA(seuset, npcs = num_pcs) Seurat::VizDimLoadings(seuset, dims = 1:2) -Seurat::DimPlot(seuset, dims = c(1,2), reduction = "pca") -Seurat::DimHeatmap(seuset, dims = 1:numPCs, nfeatures = 30, reduction = "pca") -seuset <- Seurat::JackStraw(seuset, dims=numPCs, reduction = "pca", num.replicate = 100) -seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:numPCs) -Seurat::JackStrawPlot(seuset, dims = 1:numPCs) -Seurat::ElbowPlot(seuset, ndims = numPCs, reduction = "pca") +Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca") +Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 30, reduction = "pca") +seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100) +seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs) +Seurat::JackStrawPlot(seuset, dims = 1:num_pcs) +Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca") #+ echo = FALSE -if(showcode == TRUE && tsne == TRUE) print("tSNE") +if (showcode == TRUE && tsne == TRUE) print("tSNE") #+ echo = `showcode`, warning = `warn`, include = `tsne` seuset <- Seurat::FindNeighbors(object = seuset) seuset <- Seurat::FindClusters(object = seuset) -seuset <- Seurat::RunTSNE(seuset, dims = 1:numPCs, resolution = resolution) +if (perplexity == -1) { + seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution); +} else { + seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity); +} Seurat::DimPlot(seuset, reduction = "tsne") #+ echo = FALSE -if(showcode == TRUE && heatmaps == TRUE) print("Marker Genes") +if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes") #+ echo = `showcode`, warning = `warn`, include = `heatmaps` markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold) top10 <- dplyr::group_by(markers, cluster) top10 <- dplyr::top_n(top10, 10, avg_logFC) Seurat::DoHeatmap(seuset, features = top10$gene) +# nolint end
--- a/seurat.xml Mon Jun 08 17:40:51 2020 -0400 +++ b/seurat.xml Thu Jul 23 11:22:05 2020 -0400 @@ -1,4 +1,4 @@ -<tool id="seurat" name="Seurat" version="@TOOL_VERSION@+galaxy1"> +<tool id="seurat" name="Seurat" version="@TOOL_VERSION@+galaxy2"> <description>- toolkit for exploration of single-cell RNA-seq data</description> <macros> <token name="@TOOL_VERSION@">3.1.5</token> @@ -42,6 +42,11 @@ #else #set $heatmaps = 'F' #end if +#if not str($adv.perplexity): + #set $adv_perplexity = -1 +#else: + #set $adv_perplexity = $adv.perplexity +#end if Rscript -e "library(\"rmarkdown\"); render(\"$__tool_directory__/Seurat.R\", params = list(counts = \"${counts}\", min_cells = \"${adv.min_cells}\", @@ -51,6 +56,7 @@ numPCs = \"${adv.num_PCs}\", cells_use = \"${adv.cells_use}\", resolution = \"${adv.resolution}\", + perplexity = \"${adv_perplexity}\", min_pct = \"${adv.min_pct}\", logfc_threshold = \"${adv.logfc_threshold}\", warn = \"${meta.warn}\", @@ -76,6 +82,7 @@ <param name="high_thresholds" type="integer" value="20000000" label="High threshold for filtering cells" /> <param name="cells_use" type="integer" min="1" value="500" label="Cells to use for PCHeatmap" help="Plots this number of top ‘extreme’ cells on both ends of the spectrum, which dramatically speeds plotting for large datasets" /> <param name="resolution" type="float" value="0.6" label="Resolution parameter" help="Value of the resolution parameter used in FindClusters, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities." /> + <param name="perplexity" type="integer" value="" optional="true" label="Perplexity parameter" help="Parameter for the tSNE dimensionality reduction" /> <param name="min_pct" type="float" value="0.1" label="Minimum percent cells" help="With FindMarkers only test genes that are detected in a minimum fraction of min.pct cells in either of the two populations. Meant to speed up the function by not testing genes that are very infrequently expressed. Default is 0.1" /> <param name="logfc_threshold" type="float" min="0" value="0.25" label="LogFC threshold" help="With FindMarkers, limit testing to genes which show, on average, at least X-fold difference (log-scale)between the two groups of cells. Default is 0.25 Increasing logfc.threshold speeds up the function, but can miss weaker signals." /> @@ -119,6 +126,28 @@ </section> <output name="out_html" ftype="html" value="out.html" compare="sim_size" delta="20000" /> </test> + <test> <!-- perplexity test --> + <param name="counts" ftype="tabular" value="counts.tab.gz"/> + <section name="adv"> + <param name="numPCs" value="10" /> + <param name="min_cells" value="3"/> + <param name="min_genes" value="200"/> + <param name="low_thresholds" value="1" /> + <param name="high_thresholds" value="20000000" /> + <param name="cells_use" value="500"/> + <param name="resolution" value="0.6" /> + <param name="perplexity" value="16" /> + <param name="min_pct" value="0.25" /> + <param name="logfc_threshold" value="0.25" /> + </section> + <section name="meta"> + <param name="showcode" value="T"/> + <param name="warn" value="F"/> + <param name="varstate" value="F"/> + <param name="plots" value="feat"/> + </section> + <output name="out_html" ftype="html" value="out.html" compare="sim_size" delta="20000" /> + </test> </tests> <help><![CDATA[ .. class:: infomark