Mercurial > repos > iuc > raceid_inspectclusters
comparison scripts/cluster.R @ 6:41f34e925bd5 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit 53916f6803b93234f992f5fd4fad61d7013d82af"
author | iuc |
---|---|
date | Thu, 15 Apr 2021 18:59:31 +0000 |
parents | 20f522154663 |
children | f3eb2291da05 |
comparison
equal
deleted
inserted
replaced
5:37a47c4fd84d | 6:41f34e925bd5 |
---|---|
1 #!/usr/bin/env R | 1 #!/usr/bin/env R |
2 VERSION = "0.5" | 2 VERSION <- "0.5" # nolint |
3 | 3 |
4 args = commandArgs(trailingOnly = T) | 4 args <- commandArgs(trailingOnly = T) |
5 | 5 |
6 if (length(args) != 1){ | 6 if (length(args) != 1) { |
7 message(paste("VERSION:", VERSION)) | 7 message(paste("VERSION:", VERSION)) |
8 stop("Please provide the config file") | 8 stop("Please provide the config file") |
9 } | 9 } |
10 | 10 |
11 suppressWarnings(suppressPackageStartupMessages(require(RaceID))) | 11 suppressWarnings(suppressPackageStartupMessages(require(RaceID))) |
12 suppressWarnings(suppressPackageStartupMessages(require(scran))) | 12 ## suppressWarnings(suppressPackageStartupMessages(require(scran))) # nolint |
13 source(args[1]) | 13 source(args[1]) |
14 | 14 |
15 | 15 |
16 do.filter <- function(sc){ | 16 do.filter <- function(sc) { # nolint |
17 if (!is.null(filt.lbatch.regexes)){ | 17 if (!is.null(filt.lbatch.regexes)) { |
18 lar <- filt.lbatch.regexes | 18 lar <- filt.lbatch.regexes |
19 nn <- colnames(sc@expdata) | 19 nn <- colnames(sc@expdata) |
20 filt$LBatch <- lapply(1:length(lar), function(m){ return( nn[grep(lar[[m]], nn)] ) }) | 20 filt$LBatch <- lapply(1:length(lar), function(m) { # nolint |
21 return(nn[grep(lar[[m]], nn)])}) | |
21 } | 22 } |
22 | 23 |
23 sc <- do.call(filterdata, c(sc, filt)) | 24 sc <- do.call(filterdata, c(sc, filt)) |
24 | 25 |
25 ## Get histogram metrics for library size and number of features | 26 ## Get histogram metrics for library size and number of features |
26 raw.lib <- log10(colSums(as.matrix(sc@expdata))) | 27 raw_lib <- log10(colSums(as.matrix(sc@expdata))) |
27 raw.feat <- log10(colSums(as.matrix(sc@expdata)>0)) | 28 raw_feat <- log10(colSums(as.matrix(sc@expdata) > 0)) |
28 filt.lib <- log10(colSums(getfdata(sc))) | 29 filt_lib <- log10(colSums(as.matrix(getfdata(sc)))) |
29 filt.feat <- log10(colSums(getfdata(sc)>0)) | 30 filt_feat <- log10(colSums(as.matrix(getfdata(sc) > 0))) |
30 | 31 |
31 if (filt.geqone){ | 32 if (filt.geqone) { |
32 filt.feat <- log10(colSums(getfdata(sc)>=1)) | 33 filt_feat <- log10(colSums(as.matrix(getfdata(sc) >= 1))) # nolint |
33 } | 34 } |
34 | 35 |
35 br <- 50 | 36 br <- 50 |
36 ## Determine limits on plots based on the unfiltered data | 37 par(mfrow = c(2, 2)) |
37 ## (doesn't work, R rejects limits and norm data is too different to compare to exp data | 38 print(hist(raw_lib, breaks = br, main = "RawData Log10 LibSize")) |
38 ## so let them keep their own ranges) | 39 print(hist(raw_feat, breaks = br, main = "RawData Log10 NumFeat")) |
39 | 40 print(hist(filt_lib, breaks = br, main = "FiltData Log10 LibSize")) |
40 ## betterrange <- function(floatval){ | 41 tmp <- hist(filt_feat, breaks = br, main = "FiltData Log10 NumFeat") |
41 ## return(10 * (floor(floatval / 10) + 1)) | |
42 ## } | |
43 | |
44 ## tmp.lib <- hist(raw.lib, breaks=br, plot=F) | |
45 ## tmp.feat <- hist(raw.feat, breaks=br, plot=F) | |
46 | |
47 ## lib.y_lim <- c(0,betterrange(max(tmp.lib$counts))) | |
48 ## lib.x_lim <- c(0,betterrange(max(tmp.lib$breaks))) | |
49 | |
50 ## feat.y_lim <- c(0,betterrange(max(tmp.feat$counts))) | |
51 ## feat.x_lim <- c(0,betterrange(max(tmp.feat$breaks))) | |
52 | |
53 par(mfrow=c(2,2)) | |
54 print(hist(raw.lib, breaks=br, main="RawData Log10 LibSize")) # , xlim=lib.x_lim, ylim=lib.y_lim) | |
55 print(hist(raw.feat, breaks=br, main="RawData Log10 NumFeat")) #, xlim=feat.x_lim, ylim=feat.y_lim) | |
56 print(hist(filt.lib, breaks=br, main="FiltData Log10 LibSize")) # , xlim=lib.x_lim, ylim=lib.y_lim) | |
57 tmp <- hist(filt.feat, breaks=br, main="FiltData Log10 NumFeat") # , xlim=feat.x_lim, ylim=feat.y_lim) | |
58 print(tmp) | 42 print(tmp) |
59 ## required, for extracting midpoint | 43 ## required, for extracting midpoint |
60 unq <- unique(filt.feat) | 44 unq <- unique(filt_feat) |
61 if (length(unq) == 1){ | 45 if (length(unq) == 1) { |
62 abline(v=unq, col="red", lw=2) | 46 abline(v = unq, col = "red", lw = 2) |
63 text(tmp$mids, table(filt.feat)[[1]] - 100, pos=1, paste(10^unq, "\nFeatures\nin remaining\nCells", sep=""), cex=0.8) | 47 text(tmp$mids, table(filt_feat)[[1]] - 100, pos = 1, |
48 paste(10^unq, "\nFeatures\nin remaining\nCells", | |
49 sep = ""), cex = 0.8) | |
64 } | 50 } |
65 | 51 |
66 if (filt.use.ccorrect){ | 52 if (filt.use.ccorrect) { |
67 par(mfrow=c(2,2)) | 53 par(mfrow = c(2, 2)) |
68 sc <- do.call(CCcorrect, c(sc, filt.ccc)) | 54 sc <- do.call(CCcorrect, c(sc, filt.ccc)) |
69 print(plotdimsat(sc, change=T)) | 55 print(plotdimsat(sc, change = T)) |
70 print(plotdimsat(sc, change=F)) | 56 print(plotdimsat(sc, change = F)) |
71 } | 57 } |
72 return(sc) | 58 return(sc) |
73 } | 59 } |
74 | 60 |
75 do.cluster <- function(sc){ | 61 do.cluster <- function(sc) { # nolint |
76 sc <- do.call(compdist, c(sc, clust.compdist)) | 62 sc <- do.call(compdist, c(sc, clust.compdist)) |
77 sc <- do.call(clustexp, c(sc, clust.clustexp)) | 63 sc <- do.call(clustexp, c(sc, clust.clustexp)) |
78 if (clust.clustexp$sat){ | 64 if (clust.clustexp$sat) { |
79 print(plotsaturation(sc, disp=F)) | 65 print(plotsaturation(sc, disp = F)) |
80 print(plotsaturation(sc, disp=T)) | 66 print(plotsaturation(sc, disp = T)) |
81 } | 67 } |
82 print(plotjaccard(sc)) | 68 print(plotjaccard(sc)) |
83 return(sc) | 69 return(sc) |
84 } | 70 } |
85 | 71 |
86 do.outlier <- function(sc){ | 72 do.outlier <- function(sc) { # nolint |
87 sc <- do.call(findoutliers, c(sc, outlier.findoutliers)) | 73 sc <- do.call(findoutliers, c(sc, outlier.findoutliers)) |
88 if (outlier.use.randomforest){ | 74 if (outlier.use.randomforest) { |
89 sc <- do.call(rfcorrect, c(sc, outlier.rfcorrect)) | 75 sc <- do.call(rfcorrect, c(sc, outlier.rfcorrect)) |
90 } | 76 } |
91 print(plotbackground(sc)) | 77 print(plotbackground(sc)) |
92 print(plotsensitivity(sc)) | 78 print(plotsensitivity(sc)) |
93 print(plotoutlierprobs(sc)) | 79 print(plotoutlierprobs(sc)) |
94 ## Heatmaps | 80 ## Heatmaps |
95 test1 <- list() | 81 test1 <- list() |
96 test1$side = 3 | 82 test1$side <- 3 |
97 test1$line = 0 #1 #3 | 83 test1$line <- 0 #1 #3 |
98 | 84 |
99 x <- clustheatmap(sc, final=FALSE) | 85 x <- clustheatmap(sc, final = FALSE) |
100 print(do.call(mtext, c(paste("(Initial)"), test1))) ## spacing is a hack | 86 print(do.call(mtext, c(paste("(Initial)"), test1))) |
101 x <- clustheatmap(sc, final=TRUE) | 87 x <- clustheatmap(sc, final = TRUE) |
102 print(do.call(mtext, c(paste("(Final)"), test1))) ## spacing is a hack | 88 print(do.call(mtext, c(paste("(Final)"), test1))) |
103 return(sc) | 89 return(sc) |
104 } | 90 } |
105 | 91 |
106 do.clustmap <- function(sc){ | 92 do.clustmap <- function(sc) { # nolint |
107 sc <- do.call(comptsne, c(sc, cluster.comptsne)) | 93 sc <- do.call(comptsne, c(sc, cluster.comptsne)) |
108 sc <- do.call(compfr, c(sc, cluster.compfr)) | 94 sc <- do.call(compfr, c(sc, cluster.compfr)) |
95 sc <- do.call(compumap, c(sc, cluster.compumap)) | |
109 return(sc) | 96 return(sc) |
110 } | 97 } |
111 | 98 |
112 | 99 |
113 mkgenelist <- function(sc){ | 100 mkgenelist <- function(sc) { |
114 ## Layout | 101 ## Layout |
115 test <- list() | 102 test <- list() |
116 test$side = 3 | 103 test$side <- 4 |
117 test$line = 0 #1 #3 | 104 test$line <- -2 |
118 test$cex = 0.8 | 105 test$cex <- 0.8 |
119 | 106 |
120 df <- c() | 107 df <- c() |
121 options(cex = 1) | 108 options(cex = 1) |
122 lapply(unique(sc@cpart), function(n){ | 109 plot.new() |
123 dg <- clustdiffgenes(sc, cl=n, pvalue=genelist.pvalue) | 110 lapply(unique(sc@cpart), function(n) { |
111 dg <- clustdiffgenes(sc, cl = n, pvalue = genelist.pvalue)$dg | |
124 | 112 |
125 dg.goi <- dg[dg$fc > genelist.foldchange,] | 113 dg_goi <- dg[dg$fc > genelist.foldchange, ] |
126 dg.goi.table <- head(dg.goi, genelist.tablelim) | 114 dg_goi_table <- head(dg_goi, genelist.tablelim) |
127 df <<- rbind(df, cbind(n, dg.goi.table)) | 115 df <<- rbind(df, cbind(n, dg_goi_table)) |
128 | 116 |
129 goi <- head(rownames(dg.goi.table), genelist.plotlim) | 117 goi <- head(rownames(dg_goi_table), genelist.plotlim) |
118 | |
130 print(plotmarkergenes(sc, goi)) | 119 print(plotmarkergenes(sc, goi)) |
131 buffer <- paste(rep("", 36), collapse=" ") | 120 buffer <- paste(rep("", 36), collapse = " ") |
132 print(do.call(mtext, c(paste(buffer, "Cluster ",n), test))) ## spacing is a hack | 121 print(do.call(mtext, c(paste(buffer, "Cluster ", n), test))) |
133 test$line=-1 | 122 test$line <- -1 |
134 print(do.call(mtext, c(paste(buffer, "Sig. Genes"), test))) ## spacing is a hack | 123 print(do.call(mtext, c(paste(buffer, "Sig. Genes"), test))) |
135 test$line=-2 | 124 test$line <- 0 |
136 print(do.call(mtext, c(paste(buffer, "(fc > ", genelist.foldchange,")"), test))) ## spacing is a hack | 125 print(do.call(mtext, c(paste(buffer, "(fc > ", |
137 | 126 genelist.foldchange, ")"), test))) |
138 }) | 127 }) |
139 write.table(df, file=out.genelist, sep="\t", quote=F) | 128 write.table(df, file = out.genelist, sep = "\t", quote = F) |
140 } | 129 } |
141 | 130 |
142 | 131 |
143 writecellassignments <- function(sc){ | 132 writecellassignments <- function(sc) { |
144 dat <- sc@cluster$kpart | 133 dat <- sc@cluster$kpart |
145 tab <- data.frame(row.names = NULL, | 134 tab <- data.frame(row.names = NULL, |
146 cells = names(dat), | 135 cells = names(dat), |
147 cluster.initial = dat, | 136 cluster.initial = dat, |
148 cluster.final = sc@cpart, | 137 cluster.final = sc@cpart, |
149 is.outlier = names(dat) %in% sc@out$out) | 138 is.outlier = names(dat) %in% sc@out$out) |
150 | 139 |
151 write.table(tab, file=out.assignments, sep="\t", quote=F, row.names = F) | 140 write.table(tab, file = out.assignments, sep = "\t", |
141 quote = F, row.names = F) | |
152 } | 142 } |
153 | 143 |
154 | 144 |
155 pdf(out.pdf) | 145 pdf(out.pdf) |
156 | 146 |
157 if (use.filtnormconf){ | 147 if (use.filtnormconf) { |
158 sc <- do.filter(sc) | 148 sc <- do.filter(sc) |
159 message(paste(" - Source:: genes:",nrow(sc@expdata),", cells:",ncol(sc@expdata))) | 149 message(paste(" - Source:: genes:", nrow(sc@expdata), |
160 message(paste(" - Filter:: genes:",nrow(getfdata(sc)),", cells:",ncol(getfdata(sc)))) | 150 ", cells:", ncol(sc@expdata))) |
151 message(paste(" - Filter:: genes:", nrow(as.matrix(getfdata(sc))), | |
152 ", cells:", ncol(as.matrix(getfdata(sc))))) | |
161 message(paste(" :: ", | 153 message(paste(" :: ", |
162 sprintf("%.1f", 100 * nrow(getfdata(sc))/nrow(sc@expdata)), "% of genes remain,", | 154 sprintf("%.1f", 100 * nrow(as.matrix( |
163 sprintf("%.1f", 100 * ncol(getfdata(sc))/ncol(sc@expdata)), "% of cells remain")) | 155 getfdata(sc))) / nrow(sc@expdata)), |
164 write.table(as.matrix(sc@ndata), file=out.table, col.names=NA, row.names=T, sep="\t", quote=F) | 156 "% of genes remain,", |
157 sprintf("%.1f", 100 * ncol(as.matrix( | |
158 getfdata(sc))) / ncol(sc@expdata)), | |
159 "% of cells remain")) | |
160 write.table(as.matrix(sc@ndata), file = out.table, col.names = NA, | |
161 row.names = T, sep = "\t", quote = F) | |
165 } | 162 } |
166 | 163 |
167 if (use.cluster){ | 164 if (use.cluster) { |
168 par(mfrow=c(2,2)) | 165 par(mfrow = c(2, 2)) |
169 sc <- do.cluster(sc) | 166 sc <- do.cluster(sc) |
170 | 167 |
171 par(mfrow=c(2,2)) | 168 par(mfrow = c(2, 2)) |
172 sc <- do.outlier(sc) | 169 sc <- do.outlier(sc) |
173 | 170 |
174 par(mfrow=c(2,2), mar=c(1,1,6,1)) | 171 par(mfrow = c(2, 2), mar = c(1, 1, 6, 1)) |
175 sc <- do.clustmap(sc) | 172 sc <- do.clustmap(sc) |
176 | 173 |
177 mkgenelist(sc) | 174 mkgenelist(sc) |
178 writecellassignments(sc) | 175 writecellassignments(sc) |
179 } | 176 } |