changeset 6:764f076e9d52 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit e8eeff2efea68e1e5bc697d0ff6a5808cd978db2"
author iuc
date Thu, 23 Jul 2020 11:22:05 -0400 (2020-07-23)
parents 06ed31cf52ed
children 4c139a9415d7
files Seurat.R seurat.xml
diffstat 2 files changed, 62 insertions(+), 23 deletions(-) [+]
line wrap: on
line diff
--- a/Seurat.R	Mon Jun 08 17:40:51 2020 -0400
+++ b/Seurat.R	Thu Jul 23 11:22:05 2020 -0400
@@ -10,6 +10,7 @@
 #'     numPCs: ""
 #'     cells_use: ""
 #'     resolution: ""
+#'     perplexity: ""
 #'     min_pct: ""
 #'     logfc_threshold: ""
 #'     showcode: ""
@@ -22,89 +23,98 @@
 #'     heatmaps: ""
 #' ---
 
+# nolint start
 #+ echo=F, warning = F, message=F
-options(show.error.messages = F, error = function(){cat(geterrmessage(), file = stderr()); q("no", 1, F)})
+options(show.error.messages = F, error = function() {
+    cat(geterrmessage(), file = stderr()); q("no", 1, F)
+})
 showcode <- as.logical(params$showcode)
 warn <-  as.logical(params$warn)
 varstate <- as.logical(params$varstate)
 vlnfeat <- as.logical(params$vlnfeat)
 featplot <- as.logical(params$featplot)
-PCplots <- as.logical(params$PCplots)
+pc_plots <- as.logical(params$PCplots)
 tsne <- as.logical(params$tsne)
 heatmaps <- as.logical(params$heatmaps)
 
 # we need that to not crash Galaxy with an UTF-8 error on German LC settings.
 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
 
-
 #+ echo = F, warning = `warn`, include =`varstate`
 min_cells <- as.integer(params$min_cells)
 min_genes <- as.integer(params$min_genes)
 low_thresholds <- as.integer(params$low_thresholds)
 high_thresholds <- as.integer(params$high_thresholds)
-numPCs <- as.integer(params$numPCs)
+num_pcs <- as.integer(params$numPCs)
 cells_use <- as.integer(params$cells_use)
 resolution <- as.double(params$resolution)
+perplexity <- as.integer(params$perplexity)
 min_pct <- as.double(params$min_pct)
 logfc_threshold <- as.double(params$logfc_thresh)
 print(paste0("Minimum cells: ", min_cells))
 print(paste0("Minimum features: ", min_genes))
 print(paste0("Umi low threshold: ", low_thresholds))
 print(paste0("Umi high threshold: ", high_thresholds))
-print(paste0("Number of principal components: ", numPCs))
+print(paste0("Number of principal components: ", num_pcs))
 print(paste0("Resolution: ", resolution))
+print(paste0("Perplexity: ", perplexity))
 print(paste0("Minimum percent of cells", min_pct))
 print(paste0("Logfold change threshold", logfc_threshold))
 
 #+ echo = FALSE
-if(showcode == TRUE) print("Read in data, generate inital Seurat object")
+if (showcode == TRUE) print("Read in data, generate inital Seurat object")
 #+ echo = `showcode`, warning = `warn`, message = F
 counts <- read.delim(params$counts, row.names = 1)
 seuset <- Seurat::CreateSeuratObject(counts = counts, min.cells = min_cells, min.features = min_genes)
 
 #+ echo = FALSE
-if(showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization")
-#+ echo = `showcode`, warning = `warn`, include=`vlnfeat` 
+if (showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization")
+#+ echo = `showcode`, warning = `warn`, include=`vlnfeat`
 Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA"))
 Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
 
 #+ echo = FALSE
-if(showcode == TRUE) print("Filter and normalize for UMI counts")
+if (showcode == TRUE) print("Filter and normalize for UMI counts")
 #+ echo = `showcode`, warning = `warn`
 seuset <- subset(seuset, subset = `nCount_RNA` > low_thresholds & `nCount_RNA` < high_thresholds)
 seuset <- Seurat::NormalizeData(seuset, normalization.method = "LogNormalize", scale.factor = 10000)
 
 #+ echo = FALSE
-if(showcode == TRUE && featplot == TRUE) print("Variable Genes")
+if (showcode == TRUE && featplot == TRUE) print("Variable Genes")
 #+ echo = `showcode`, warning = `warn`, include = `featplot`
 seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp")
 Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp")
 seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA")
 
 #+ echo = FALSE
-if(showcode == TRUE && PCplots == TRUE) print("PCA Visualization")
-#+ echo = `showcode`, warning = `warn`, include = `PCplots`
-seuset <- Seurat::RunPCA(seuset, npcs = numPCs)
+if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization")
+#+ echo = `showcode`, warning = `warn`, include = `pc_plots`
+seuset <- Seurat::RunPCA(seuset, npcs = num_pcs)
 Seurat::VizDimLoadings(seuset, dims = 1:2)
-Seurat::DimPlot(seuset, dims = c(1,2), reduction = "pca")
-Seurat::DimHeatmap(seuset, dims = 1:numPCs, nfeatures = 30, reduction = "pca")
-seuset <- Seurat::JackStraw(seuset, dims=numPCs, reduction = "pca", num.replicate = 100)
-seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:numPCs)
-Seurat::JackStrawPlot(seuset, dims = 1:numPCs)
-Seurat::ElbowPlot(seuset, ndims = numPCs, reduction = "pca")
+Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca")
+Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 30, reduction = "pca")
+seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100)
+seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs)
+Seurat::JackStrawPlot(seuset, dims = 1:num_pcs)
+Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca")
 
 #+ echo = FALSE
-if(showcode == TRUE && tsne == TRUE) print("tSNE")
+if (showcode == TRUE && tsne == TRUE) print("tSNE")
 #+ echo = `showcode`, warning = `warn`, include = `tsne`
 seuset <- Seurat::FindNeighbors(object = seuset)
 seuset <- Seurat::FindClusters(object = seuset)
-seuset <- Seurat::RunTSNE(seuset, dims = 1:numPCs, resolution = resolution)
+if (perplexity == -1) {
+    seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution);
+} else {
+    seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity);
+}
 Seurat::DimPlot(seuset, reduction = "tsne")
 
 #+ echo = FALSE
-if(showcode == TRUE && heatmaps == TRUE) print("Marker Genes")
+if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes")
 #+ echo = `showcode`, warning = `warn`, include = `heatmaps`
 markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold)
 top10 <- dplyr::group_by(markers, cluster)
 top10 <- dplyr::top_n(top10, 10, avg_logFC)
 Seurat::DoHeatmap(seuset, features = top10$gene)
+# nolint end
--- a/seurat.xml	Mon Jun 08 17:40:51 2020 -0400
+++ b/seurat.xml	Thu Jul 23 11:22:05 2020 -0400
@@ -1,4 +1,4 @@
-<tool id="seurat" name="Seurat" version="@TOOL_VERSION@+galaxy1">
+<tool id="seurat" name="Seurat" version="@TOOL_VERSION@+galaxy2">
     <description>- toolkit for exploration of single-cell RNA-seq data</description>
     <macros>
         <token name="@TOOL_VERSION@">3.1.5</token>
@@ -42,6 +42,11 @@
 #else
     #set $heatmaps = 'F'
 #end if
+#if not str($adv.perplexity):
+    #set $adv_perplexity = -1
+#else:
+    #set $adv_perplexity = $adv.perplexity
+#end if
 Rscript -e "library(\"rmarkdown\"); render(\"$__tool_directory__/Seurat.R\",
     params = list(counts = \"${counts}\",
         min_cells = \"${adv.min_cells}\",
@@ -51,6 +56,7 @@
         numPCs = \"${adv.num_PCs}\",
         cells_use = \"${adv.cells_use}\",
         resolution = \"${adv.resolution}\",
+        perplexity = \"${adv_perplexity}\",
         min_pct = \"${adv.min_pct}\",
         logfc_threshold = \"${adv.logfc_threshold}\",
         warn = \"${meta.warn}\",
@@ -76,6 +82,7 @@
             <param name="high_thresholds" type="integer" value="20000000" label="High threshold for filtering cells" />
             <param name="cells_use" type="integer" min="1" value="500" label="Cells to use for PCHeatmap" help="Plots this number of top ‘extreme’ cells on both ends of the spectrum, which dramatically speeds plotting for large datasets" />
             <param name="resolution" type="float" value="0.6" label="Resolution parameter" help="Value of the resolution parameter used in FindClusters, use a value above (below) 1.0 if you want to obtain a larger (smaller) number of communities." />
+            <param name="perplexity" type="integer" value="" optional="true" label="Perplexity parameter" help="Parameter for the tSNE dimensionality reduction" />
             <param name="min_pct" type="float" value="0.1" label="Minimum percent cells" help="With FindMarkers only test genes that are detected in a minimum fraction of min.pct cells in either of the two populations. Meant to speed up the function by not testing genes that are very infrequently expressed. Default is 0.1" />
             <param name="logfc_threshold" type="float" min="0" value="0.25" label="LogFC threshold"
                 help="With FindMarkers, limit testing to genes which show, on average, at least X-fold difference (log-scale)between the two groups of cells. Default is 0.25 Increasing logfc.threshold speeds up the function, but can miss weaker signals." />
@@ -119,6 +126,28 @@
             </section>
             <output name="out_html" ftype="html" value="out.html" compare="sim_size" delta="20000" />
         </test>
+        <test> <!-- perplexity test -->
+            <param name="counts" ftype="tabular" value="counts.tab.gz"/>
+            <section name="adv">
+                <param name="numPCs" value="10" />
+                <param name="min_cells" value="3"/>
+                <param name="min_genes" value="200"/>
+                <param name="low_thresholds" value="1" />
+                <param name="high_thresholds" value="20000000" />
+                <param name="cells_use" value="500"/>
+                <param name="resolution" value="0.6" />
+                <param name="perplexity" value="16" />
+                <param name="min_pct" value="0.25" />
+                <param name="logfc_threshold" value="0.25" />
+            </section>
+            <section name="meta">
+                <param name="showcode" value="T"/>
+                <param name="warn" value="F"/>
+                <param name="varstate" value="F"/>
+                <param name="plots" value="feat"/>
+            </section>
+            <output name="out_html" ftype="html" value="out.html" compare="sim_size" delta="20000" />
+        </test>
     </tests>
     <help><![CDATA[
 .. class:: infomark