Mercurial > repos > iuc > seurat
comparison Seurat.R @ 15:fab6ff46e019 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit b437a46efb50e543b6d7c9988f954efe2caa9046
author | iuc |
---|---|
date | Fri, 07 Jul 2023 01:43:02 +0000 |
parents | c4db6ec33fec |
children |
comparison
equal
deleted
inserted
replaced
14:c0fd285eb553 | 15:fab6ff46e019 |
---|---|
6 #' min_cells: "" | 6 #' min_cells: "" |
7 #' min_genes: "" | 7 #' min_genes: "" |
8 #' low_thresholds: "" | 8 #' low_thresholds: "" |
9 #' high_thresholds: "" | 9 #' high_thresholds: "" |
10 #' numPCs: "" | 10 #' numPCs: "" |
11 #' cells_use: "" | |
12 #' resolution: "" | 11 #' resolution: "" |
13 #' perplexity: "" | 12 #' perplexity: "" |
14 #' min_pct: "" | 13 #' min_pct: "" |
15 #' logfc_threshold: "" | 14 #' logfc_threshold: "" |
15 #' end_step: "" | |
16 #' showcode: "" | 16 #' showcode: "" |
17 #' warn: "" | 17 #' warn: "" |
18 #' varstate: "" | 18 #' varstate: "" |
19 #' vlnfeat: "" | 19 #' vlnfeat: "" |
20 #' featplot: "" | 20 #' featplot: "" |
21 #' PCplots: "" | 21 #' PCplots: "" |
22 #' tsne: "" | 22 #' nmds: "" |
23 #' heatmaps: "" | 23 #' heatmaps: "" |
24 #' norm_out: "" | |
25 #' variable_out: "" | |
26 #' pca_out : "" | |
27 #' clusters_out: "" | |
28 #' markers_out: "" | |
24 #' --- | 29 #' --- |
25 | 30 |
26 # nolint start | 31 # nolint start |
27 #+ echo=F, warning = F, message=F | 32 #+ echo=F, warning = F, message=F |
28 options(show.error.messages = F, error = function() { | 33 options(show.error.messages = F, error = function() { |
32 warn <- as.logical(params$warn) | 37 warn <- as.logical(params$warn) |
33 varstate <- as.logical(params$varstate) | 38 varstate <- as.logical(params$varstate) |
34 vlnfeat <- as.logical(params$vlnfeat) | 39 vlnfeat <- as.logical(params$vlnfeat) |
35 featplot <- as.logical(params$featplot) | 40 featplot <- as.logical(params$featplot) |
36 pc_plots <- as.logical(params$PCplots) | 41 pc_plots <- as.logical(params$PCplots) |
37 tsne <- as.logical(params$tsne) | 42 nmds <- as.logical(params$nmds) |
38 heatmaps <- as.logical(params$heatmaps) | 43 heatmaps <- as.logical(params$heatmaps) |
39 | 44 end_step <- as.integer(params$end_step) |
45 norm_out <- as.logical(params$norm_out) | |
40 # we need that to not crash Galaxy with an UTF-8 error on German LC settings. | 46 # we need that to not crash Galaxy with an UTF-8 error on German LC settings. |
41 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 47 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
42 | 48 |
43 #+ echo = F, warning = `warn`, include =`varstate` | 49 #+ echo = F, warning = `warn`, include =`varstate` |
44 min_cells <- as.integer(params$min_cells) | 50 min_cells <- as.integer(params$min_cells) |
45 min_genes <- as.integer(params$min_genes) | 51 min_genes <- as.integer(params$min_genes) |
52 print(paste0("Minimum cells: ", min_cells)) | |
53 print(paste0("Minimum features: ", min_genes)) | |
46 low_thresholds <- as.integer(params$low_thresholds) | 54 low_thresholds <- as.integer(params$low_thresholds) |
47 high_thresholds <- as.integer(params$high_thresholds) | 55 high_thresholds <- as.integer(params$high_thresholds) |
48 num_pcs <- as.integer(params$numPCs) | |
49 cells_use <- as.integer(params$cells_use) | |
50 resolution <- as.double(params$resolution) | |
51 perplexity <- as.integer(params$perplexity) | |
52 min_pct <- as.double(params$min_pct) | |
53 logfc_threshold <- as.double(params$logfc_thresh) | |
54 print(paste0("Minimum cells: ", min_cells)) | |
55 print(paste0("Minimum features: ", min_genes)) | |
56 print(paste0("Umi low threshold: ", low_thresholds)) | 56 print(paste0("Umi low threshold: ", low_thresholds)) |
57 print(paste0("Umi high threshold: ", high_thresholds)) | 57 print(paste0("Umi high threshold: ", high_thresholds)) |
58 print(paste0("Number of principal components: ", num_pcs)) | |
59 print(paste0("Resolution: ", resolution)) | |
60 print(paste0("Perplexity: ", perplexity)) | |
61 print(paste0("Minimum percent of cells", min_pct)) | |
62 print(paste0("Logfold change threshold", logfc_threshold)) | |
63 | 58 |
64 #+ echo = FALSE | 59 if (end_step >= 2) { |
60 variable_out <- as.logical(params$variable_out) | |
61 } | |
62 | |
63 | |
64 if (end_step >= 3) { | |
65 num_pcs <- as.integer(params$numPCs) | |
66 print(paste0("Number of principal components: ", num_pcs)) | |
67 pca_out <- as.logical(params$pca_out) | |
68 } | |
69 if (end_step >= 4) { | |
70 if (params$perplexity == "") { | |
71 perplexity <- -1 | |
72 print(paste0("Perplexity: ", perplexity)) | |
73 } else { | |
74 perplexity <- as.integer(params$perplexity) | |
75 print(paste0("Perplexity: ", perplexity)) | |
76 } | |
77 resolution <- as.double(params$resolution) | |
78 print(paste0("Resolution: ", resolution)) | |
79 clusters_out <- as.logical(params$clusters_out) | |
80 } | |
81 if (end_step >= 5) { | |
82 min_pct <- as.double(params$min_pct) | |
83 logfc_threshold <- as.double(params$logfc_thresh) | |
84 print(paste0("Minimum percent of cells", min_pct)) | |
85 print(paste0("Logfold change threshold", logfc_threshold)) | |
86 markers_out <- as.logical(params$markers_out) | |
87 } | |
88 | |
89 | |
65 if (showcode == TRUE) print("Read in data, generate inital Seurat object") | 90 if (showcode == TRUE) print("Read in data, generate inital Seurat object") |
66 #+ echo = `showcode`, warning = `warn`, message = F | 91 #+ echo = `showcode`, warning = `warn`, message = F |
67 counts <- read.delim(params$counts, row.names = 1) | 92 counts <- read.delim(params$counts, row.names = 1) |
68 seuset <- Seurat::CreateSeuratObject(counts = counts, min.cells = min_cells, min.features = min_genes) | 93 seuset <- Seurat::CreateSeuratObject(counts = counts, min.cells = min_cells, min.features = min_genes) |
69 | 94 |
70 #+ echo = FALSE | |
71 if (showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization") | 95 if (showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization") |
72 #+ echo = `showcode`, warning = `warn`, include=`vlnfeat` | 96 #+ echo = `showcode`, warning = `warn`, include=`vlnfeat` |
73 Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA")) | 97 if (vlnfeat == TRUE){ |
74 Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA") | 98 print(Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA"))) |
99 print(Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")) | |
100 } | |
75 | 101 |
76 #+ echo = FALSE | |
77 if (showcode == TRUE) print("Filter and normalize for UMI counts") | 102 if (showcode == TRUE) print("Filter and normalize for UMI counts") |
78 #+ echo = `showcode`, warning = `warn` | 103 #+ echo = `showcode`, warning = `warn` |
79 seuset <- subset(seuset, subset = `nCount_RNA` > low_thresholds & `nCount_RNA` < high_thresholds) | 104 seuset <- subset(seuset, subset = `nCount_RNA` > low_thresholds & `nCount_RNA` < high_thresholds) |
80 seuset <- Seurat::NormalizeData(seuset, normalization.method = "LogNormalize", scale.factor = 10000) | 105 seuset <- Seurat::NormalizeData(seuset, normalization.method = "LogNormalize", scale.factor = 10000) |
106 if (norm_out == TRUE) { | |
107 saveRDS(seuset, "norm_out.rds") | |
108 } | |
81 | 109 |
82 #+ echo = FALSE | 110 if (end_step >= 2) { |
83 if (showcode == TRUE && featplot == TRUE) print("Variable Genes") | 111 #+ echo = FALSE |
84 #+ echo = `showcode`, warning = `warn`, include = `featplot` | 112 if (showcode == TRUE && featplot == TRUE) print("Variable Genes") |
85 seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp") | 113 #+ echo = `showcode`, warning = `warn`, include = `featplot` |
86 Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp") | 114 seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp") |
87 seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA") | 115 if (featplot == TRUE) { |
116 print(Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp")) | |
117 } | |
118 seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA") | |
119 if (variable_out == TRUE) { | |
120 saveRDS(seuset, "var_out.rds") | |
121 } | |
122 } | |
88 | 123 |
89 #+ echo = FALSE | 124 if (end_step >= 3) { |
90 if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization") | 125 #+ echo = FALSE |
91 #+ echo = `showcode`, warning = `warn`, include = `pc_plots` | 126 if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization") |
92 seuset <- Seurat::RunPCA(seuset, npcs = num_pcs) | 127 #+ echo = `showcode`, warning = `warn`, include = `pc_plots` |
93 Seurat::VizDimLoadings(seuset, dims = 1:2) | 128 seuset <- Seurat::RunPCA(seuset, npcs = num_pcs) |
94 Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca") | 129 seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100) |
95 Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 30, reduction = "pca") | 130 seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs) |
96 seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100) | 131 if (pc_plots == TRUE) { |
97 seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs) | 132 print(Seurat::VizDimLoadings(seuset, dims = 1:2)) |
98 Seurat::JackStrawPlot(seuset, dims = 1:num_pcs) | 133 print(Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca")) |
99 Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca") | 134 print(Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 30, reduction = "pca")) |
135 print(Seurat::JackStrawPlot(seuset, dims = 1:num_pcs)) | |
136 print(Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca")) | |
137 } | |
138 if (pca_out == TRUE) { | |
139 saveRDS(seuset, "pca_out.rds") | |
140 } | |
141 } | |
100 | 142 |
101 #+ echo = FALSE | 143 if (end_step >= 4) { |
102 if (showcode == TRUE && tsne == TRUE) print("tSNE") | 144 #+ echo = FALSE |
103 #+ echo = `showcode`, warning = `warn`, include = `tsne` | 145 if (showcode == TRUE && nmds == TRUE) print("tSNE and UMAP") |
104 seuset <- Seurat::FindNeighbors(object = seuset) | 146 #+ echo = `showcode`, warning = `warn`, include = `nmds` |
105 seuset <- Seurat::FindClusters(object = seuset) | 147 seuset <- Seurat::FindNeighbors(object = seuset) |
106 if (perplexity == -1) { | 148 seuset <- Seurat::FindClusters(object = seuset) |
107 seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution); | 149 if (perplexity == -1) { |
108 } else { | 150 seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution); |
109 seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity); | 151 } else { |
152 seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity); | |
153 } | |
154 if (nmds == TRUE) { | |
155 print(Seurat::DimPlot(seuset, reduction = "tsne")) | |
156 } | |
157 seuset <- Seurat::RunUMAP(seuset, dims = 1:num_pcs) | |
158 if (nmds == TRUE) { | |
159 print(Seurat::DimPlot(seuset, reduction = "umap")) | |
160 } | |
161 if (clusters_out == TRUE) { | |
162 tsnedata <- Seurat::Embeddings(seuset, reduction="tsne") | |
163 saveRDS(seuset, "tsne_out.rds") | |
164 umapdata <- Seurat::Embeddings(seuset, reduction="umap") | |
165 saveRDS(seuset, "umap_out.rds") | |
166 } | |
110 } | 167 } |
111 Seurat::DimPlot(seuset, reduction = "tsne") | |
112 | 168 |
113 #+ echo = FALSE | 169 |
114 if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes") | 170 if (end_step == 5) { |
115 #+ echo = `showcode`, warning = `warn`, include = `heatmaps` | 171 #+ echo = FALSE |
116 markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold) | 172 if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes") |
117 top10 <- dplyr::group_by(markers, cluster) | 173 #+ echo = `showcode`, warning = `warn`, include = `heatmaps` |
118 top10 <- dplyr::top_n(top10, 10, avg_log2FC) | 174 markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold) |
119 Seurat::DoHeatmap(seuset, features = top10$gene) | 175 top10 <- dplyr::group_by(markers, cluster) |
176 top10 <- dplyr::top_n(top10, n = 10, wt = avg_log2FC) | |
177 print(top10) | |
178 if (heatmaps == TRUE) { | |
179 print(Seurat::DoHeatmap(seuset, features = top10$gene)) | |
180 } | |
181 if (markers_out == TRUE) { | |
182 saveRDS(seuset, "markers_out.rds") | |
183 data.table::fwrite(x = markers, row.names=TRUE, sep="\t", file = "markers_out.tsv") | |
184 } | |
185 } | |
120 # nolint end | 186 # nolint end |