Mercurial > repos > iuc > seurat
comparison citeseq_Seurat.R @ 15:fab6ff46e019 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/seurat commit b437a46efb50e543b6d7c9988f954efe2caa9046
author | iuc |
---|---|
date | Fri, 07 Jul 2023 01:43:02 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
14:c0fd285eb553 | 15:fab6ff46e019 |
---|---|
1 #' --- | |
2 #' title: "Seurat Cite-seq Analysis" | |
3 #' author: "Performed using Galaxy" | |
4 #' params: | |
5 #' rna: "" | |
6 #' prot: "" | |
7 #' min_cells: "" | |
8 #' min_genes: "" | |
9 #' low_thresholds: "" | |
10 #' high_thresholds: "" | |
11 #' numPCs: "" | |
12 #' resolution: "" | |
13 #' perplexity: "" | |
14 #' min_pct: "" | |
15 #' logfc_threshold: "" | |
16 #' showcode: "" | |
17 #' warn: "" | |
18 #' varstate: "" | |
19 #' vlnfeat: "" | |
20 #' featplot: "" | |
21 #' PCplots: "" | |
22 #' nmds: "" | |
23 #' heatmaps: "" | |
24 #' norm_out: "" | |
25 #' variable_out: "" | |
26 #' pca_out : "" | |
27 #' clusters_out: "" | |
28 #' markers_out: "" | |
29 #' cite_markers: "" | |
30 #' comparison: "" | |
31 #' feat_comp: "" | |
32 #' marker_compare: "" | |
33 #' top_x: "" | |
34 #' --- | |
35 | |
36 # nolint start | |
37 #+ echo=F, warning = F, message=F | |
38 options(show.error.messages = F, error = function() { | |
39 cat(geterrmessage(), file = stderr()); q("no", 1, F) | |
40 }) | |
41 | |
42 # we need that to not crash Galaxy with an UTF-8 error on German LC settings. | |
43 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
44 | |
45 showcode <- as.logical(params$showcode) | |
46 warn <- as.logical(params$warn) | |
47 varstate <- as.logical(params$varstate) | |
48 vlnfeat <- as.logical(params$vlnfeat) | |
49 featplot <- as.logical(params$featplot) | |
50 pc_plots <- as.logical(params$PCplots) | |
51 nmds <- as.logical(params$nmds) | |
52 heatmaps <- as.logical(params$heatmaps) | |
53 end_step <- as.integer(params$end_step) | |
54 norm_out <- as.logical(params$norm_out) | |
55 comparison <- as.logical(params$comparison) | |
56 feature <- trimws(unlist(strsplit(as.character(params$feat_comp), ","))) | |
57 marker_compare <- as.logical(params$marker_compare) | |
58 top_x <- as.integer(params$top_x) | |
59 min_cells <- as.integer(params$min_cells) | |
60 min_genes <- as.integer(params$min_genes) | |
61 low_thresholds <- as.integer(params$low_thresholds) | |
62 high_thresholds <- as.integer(params$high_thresholds) | |
63 num_pcs <- as.integer(params$numPCs) | |
64 cells_use <- as.integer(params$cells_use) | |
65 resolution <- as.double(params$resolution) | |
66 min_pct <- as.double(params$min_pct) | |
67 logfc_threshold <- as.double(params$logfc_thresh) | |
68 variable_out <- as.logical(params$variable_out) | |
69 pca_out <- as.logical(params$pca_out) | |
70 clusters_out <- as.logical(params$clusters_out) | |
71 markers_out <- as.logical(params$markers_out) | |
72 | |
73 print(paste0("Minimum cells: ", min_cells)) | |
74 print(paste0("Minimum features: ", min_genes)) | |
75 print(paste0("Umi low threshold: ", low_thresholds)) | |
76 print(paste0("Umi high threshold: ", high_thresholds)) | |
77 print(paste0("Number of principal components: ", num_pcs)) | |
78 print(paste0("Resolution: ", resolution)) | |
79 print(paste0("Minimum percent of cells", min_pct)) | |
80 print(paste0("Logfold change threshold", logfc_threshold)) | |
81 if (params$perplexity == "") { | |
82 perplexity <- -1 | |
83 print(paste0("Perplexity: ", perplexity)) | |
84 } else { | |
85 perplexity <- as.integer(params$perplexity) | |
86 print(paste0("Perplexity: ", perplexity)) | |
87 } | |
88 | |
89 #+ echo = FALSE | |
90 if (showcode == TRUE) print("Read in data, generate inital Seurat object") | |
91 #+ echo = `showcode`, warning = `warn`, message = F | |
92 rna <- read.delim(params$rna, row.names = 1) | |
93 rna <- Seurat::CollapseSpeciesExpressionMatrix(rna) | |
94 protein <- read.delim(params$prot, row.names = 1) | |
95 tryCatch(all.equal(colnames(rna), colnames(protein)), error = "Columns do not match in input files") | |
96 seuset <- Seurat::CreateSeuratObject(counts = rna, min.cells = min_cells, min.features = min_genes) | |
97 | |
98 if (showcode == TRUE) print("asdf") | |
99 #+ echo = `showcode`, warning = `warn`, message = F | |
100 prot_obj <- Seurat::CreateAssayObject(counts = protein) | |
101 | |
102 if (showcode == TRUE) print("qwer") | |
103 #+ echo = `showcode`, warning = `warn`, message = F | |
104 seuset[["ADT"]] <- prot_obj | |
105 | |
106 if (showcode == TRUE) print("zxcv") | |
107 #+ echo = `showcode`, warning = `warn`, message = F | |
108 Seurat::DefaultAssay(seuset) <- "RNA" | |
109 | |
110 if (showcode == TRUE && vlnfeat == TRUE) print("Raw data vizualization") | |
111 #+ echo = `showcode`, warning = `warn`, include=`vlnfeat` | |
112 if (vlnfeat == TRUE){ | |
113 print(Seurat::VlnPlot(object = seuset, features = c("nFeature_RNA", "nCount_RNA"))) | |
114 print(Seurat::FeatureScatter(object = seuset, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")) | |
115 } | |
116 | |
117 if (showcode == TRUE) print("Filter and normalize for UMI counts") | |
118 #+ echo = `showcode`, warning = `warn` | |
119 seuset <- subset(seuset, subset = `nCount_RNA` > low_thresholds & `nCount_RNA` < high_thresholds) | |
120 seuset <- Seurat::NormalizeData(seuset, normalization.method = "LogNormalize", scale.factor = 10000) | |
121 if (norm_out == TRUE) { | |
122 saveRDS(seuset, "norm_out.rds") | |
123 } | |
124 | |
125 | |
126 if (showcode == TRUE && featplot == TRUE) print("Variable Genes") | |
127 #+ echo = `showcode`, warning = `warn`, include = `featplot` | |
128 seuset <- Seurat::FindVariableFeatures(object = seuset, selection.method = "mvp") | |
129 if (featplot == TRUE) { | |
130 print(Seurat::VariableFeaturePlot(seuset, cols = c("black", "red"), selection.method = "disp")) | |
131 } | |
132 seuset <- Seurat::ScaleData(object = seuset, vars.to.regress = "nCount_RNA") | |
133 if (variable_out == TRUE) { | |
134 saveRDS(seuset, "var_out.rds") | |
135 } | |
136 | |
137 | |
138 | |
139 if (showcode == TRUE && pc_plots == TRUE) print("PCA Visualization") | |
140 #+ echo = `showcode`, warning = `warn`, include = `pc_plots` | |
141 seuset <- Seurat::RunPCA(seuset, npcs = num_pcs) | |
142 seuset <- Seurat::JackStraw(seuset, dims = num_pcs, reduction = "pca", num.replicate = 100) | |
143 seuset <- Seurat::ScoreJackStraw(seuset, dims = 1:num_pcs) | |
144 if (pc_plots == TRUE) { | |
145 print(Seurat::VizDimLoadings(seuset, dims = 1:2)) | |
146 print(Seurat::DimPlot(seuset, dims = c(1, 2), reduction = "pca")) | |
147 print(Seurat::DimHeatmap(seuset, dims = 1:num_pcs, nfeatures = 10, reduction = "pca")) | |
148 print(Seurat::JackStrawPlot(seuset, dims = 1:num_pcs)) | |
149 print(Seurat::ElbowPlot(seuset, ndims = num_pcs, reduction = "pca")) | |
150 } | |
151 if (pca_out == TRUE) { | |
152 saveRDS(seuset, "pca_out.rds") | |
153 } | |
154 | |
155 | |
156 | |
157 if (showcode == TRUE && nmds == TRUE) print("tSNE and UMAP") | |
158 #+ echo = `showcode`, warning = `warn`, include = `nmds` | |
159 seuset <- Seurat::FindNeighbors(object = seuset) | |
160 seuset <- Seurat::FindClusters(object = seuset) | |
161 if (perplexity == -1) { | |
162 seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, check_duplicates = FALSE) | |
163 } else { | |
164 seuset <- Seurat::RunTSNE(seuset, dims = 1:num_pcs, resolution = resolution, perplexity = perplexity, check_duplicates = FALSE) | |
165 } | |
166 if (nmds == TRUE) { | |
167 print(Seurat::DimPlot(seuset, reduction = "tsne")) | |
168 } | |
169 seuset <- Seurat::RunUMAP(seuset, dims = 1:num_pcs) | |
170 if (nmds == TRUE) { | |
171 print(Seurat::DimPlot(seuset, reduction = "umap")) | |
172 } | |
173 if (clusters_out == TRUE) { | |
174 tsnedata <- Seurat::Embeddings(seuset, reduction="tsne") | |
175 saveRDS(seuset, "tsne_out.rds") | |
176 umapdata <- Seurat::Embeddings(seuset, reduction="umap") | |
177 saveRDS(seuset, "umap_out.rds") | |
178 } | |
179 | |
180 if (showcode == TRUE && heatmaps == TRUE) print("Marker Genes") | |
181 #+ echo = `showcode`, warning = `warn`, include = `heatmaps` | |
182 markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold) | |
183 top10 <- dplyr::group_by(markers, cluster) | |
184 top10 <- dplyr::top_n(top10, n = 10, wt = avg_log2FC) | |
185 print(top10) | |
186 if (heatmaps == TRUE) { | |
187 print(Seurat::DoHeatmap(seuset, features = top10$gene)) | |
188 } | |
189 if (markers_out == TRUE) { | |
190 saveRDS(seuset, "markers_out.rds") | |
191 data.table::fwrite(x = markers, row.names=TRUE, sep="\t", file = "markers_out.tsv") | |
192 } | |
193 | |
194 #+ echo = FALSE | |
195 if (showcode == TRUE && comparison == TRUE) print("Compare") | |
196 #+ echo = `showcode`, warning = `warn`, include = `comparison` | |
197 Seurat::DefaultAssay(seuset) <- "ADT" | |
198 seuset <- Seurat::NormalizeData(seuset, normalization.method = "CLR", margin = 2) | |
199 Seurat::DefaultAssay(seuset) <- "RNA" | |
200 seuset <- Seurat::NormalizeData(seuset, normalization.method = "CLR", margin = 2, assay = "ADT") | |
201 if (comparison == TRUE) { | |
202 for(x in feature) { | |
203 Seurat::DefaultAssay(seuset) <- "ADT" | |
204 p1 <- Seurat::FeaturePlot(seuset, x, cols = c("lightgrey", "red")) + ggplot2::ggtitle(paste0("Protein:", " ", x)) | |
205 Seurat::DefaultAssay(seuset) <- "RNA" | |
206 p2 <- Seurat::FeaturePlot(seuset, x) + ggplot2::ggtitle(paste0("RNA:", " ", x)) | |
207 print(p1 | p2) | |
208 label <- as.character(paste0(Seurat::Key(seuset[["ADT"]]), x)) | |
209 print(Seurat::VlnPlot(seuset, paste0("rna_", x))) | |
210 print(Seurat::VlnPlot(seuset, paste0("adt_", x))) | |
211 } | |
212 } | |
213 | |
214 #+ echo = FALSE | |
215 if (showcode == TRUE) print("Cite-seq") | |
216 #+ echo = `showcode`, warning = `warn`, include = `marker_compare` | |
217 rna_markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold, assay="RNA") | |
218 protein_markers <- Seurat::FindAllMarkers(seuset, only.pos = TRUE, min.pct = min_pct, logfc.threshold = logfc_threshold, assay="ADT") | |
219 if (marker_compare == TRUE) { | |
220 data.table::fwrite(x = rna_markers, sep="\t", file = "rna_out.tsv") | |
221 data.table::fwrite(x = protein_markers, sep="\t", file = "protein_out.tsv") | |
222 } | |
223 toprna <- dplyr::top_n(dplyr::group_by(rna_markers, cluster), n=5, avg_log2FC) | |
224 toprna <- head(as.list(unique(as.data.frame(toprna)$gene)), top_x) | |
225 topprot <- dplyr::top_n(dplyr::group_by(protein_markers, cluster), n=5, avg_log2FC) | |
226 topprot <- head(as.list(unique(as.data.frame(topprot)$gene)), top_x) | |
227 if(marker_compare == TRUE) { | |
228 pdf(file="citeseq_out.pdf") | |
229 rna_labels <- as.vector(toprna) | |
230 rna_labels <- rna_labels[!duplicated(rna_labels)] | |
231 prot_labels <- as.vector(topprot) | |
232 prot_labels <- prot_labels[!duplicated(prot_labels)] | |
233 for(rnamarker in rna_labels) { | |
234 rnamarker <- paste("rna_", rnamarker, sep = "") | |
235 for(protmarker in prot_labels) { | |
236 protmarker <- paste("adt_", protmarker, sep="") | |
237 plot <- Seurat::FeatureScatter(seuset, feature1 = rnamarker, feature2 = protmarker) + ggplot2::ggtitle(paste0(rnamarker, " vs ", protmarker)) | |
238 print(plot) | |
239 } | |
240 } | |
241 for(rnamarker in rna_labels) { | |
242 rnamarker <- paste("rna_", rnamarker, sep = "") | |
243 for(rnamarker2 in rna_labels) { | |
244 rnamarker2 <- paste("rna_", rnamarker2, sep="") | |
245 plot <- Seurat::FeatureScatter(seuset, feature1 = rnamarker, feature2 = rnamarker2) + ggplot2::ggtitle(paste0(rnamarker, " vs ", rnamarker2)) | |
246 print(plot) | |
247 } | |
248 } | |
249 for(protmarker in prot_labels) { | |
250 protmarker <- paste("adt_", protmarker, sep = "") | |
251 for(protmarker2 in prot_labels) { | |
252 protmarker2 <- paste("adt_", protmarker2, sep="") | |
253 plot <- Seurat::FeatureScatter(seuset, feature1 = protmarker, feature2 = protmarker2) + ggplot2::ggtitle(paste0(protmarker, " vs ", protmarker2)) | |
254 print(plot) | |
255 } | |
256 } | |
257 dev.off() | |
258 } | |
259 | |
260 # nolint end |