Mercurial > repos > iuc > raceid_clustering
comparison scripts/cluster.R @ 0:4ea021bd7513 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit f880060c478d42202df5b78a81329f8af56b1138
author | iuc |
---|---|
date | Thu, 22 Nov 2018 04:43:57 -0500 |
parents | |
children | 89ee61bcc310 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:4ea021bd7513 |
---|---|
1 #!/usr/bin/env R | |
2 VERSION = "0.2" | |
3 | |
4 args = commandArgs(trailingOnly = T) | |
5 | |
6 if (length(args) != 1){ | |
7 message(paste("VERSION:", VERSION)) | |
8 stop("Please provide the config file") | |
9 } | |
10 | |
11 suppressWarnings(suppressPackageStartupMessages(require(RaceID))) | |
12 suppressWarnings(suppressPackageStartupMessages(require(scran))) | |
13 source(args[1]) | |
14 | |
15 | |
16 do.filter <- function(sc){ | |
17 if (!is.null(filt.lbatch.regexes)){ | |
18 lar <- filt.lbatch.regexes | |
19 nn <- colnames(sc@expdata) | |
20 filt$LBatch <- lapply(1:length(lar), function(m){ return( nn[grep(lar[[m]], nn)] ) }) | |
21 } | |
22 | |
23 sc <- do.call(filterdata, c(sc, filt)) | |
24 | |
25 ## Get histogram metrics for library size and number of features | |
26 raw.lib <- log(colSums(as.matrix(sc@expdata))) | |
27 raw.feat <- log(rowSums(as.matrix(sc@expdata))) | |
28 filt.lib <- log(colSums(getfdata(sc))) | |
29 filt.feat <- log(rowSums(getfdata(sc))) | |
30 | |
31 br <- 50 | |
32 ## Determine limits on plots based on the unfiltered data | |
33 ## (doesn't work, R rejects limits and norm data is too different to compare to exp data | |
34 ## so let them keep their own ranges) | |
35 | |
36 ## betterrange <- function(floatval){ | |
37 ## return(10 * (floor(floatval / 10) + 1)) | |
38 ## } | |
39 | |
40 ## tmp.lib <- hist(raw.lib, breaks=br, plot=F) | |
41 ## tmp.feat <- hist(raw.feat, breaks=br, plot=F) | |
42 | |
43 ## lib.y_lim <- c(0,betterrange(max(tmp.lib$counts))) | |
44 ## lib.x_lim <- c(0,betterrange(max(tmp.lib$breaks))) | |
45 | |
46 ## feat.y_lim <- c(0,betterrange(max(tmp.feat$counts))) | |
47 ## feat.x_lim <- c(0,betterrange(max(tmp.feat$breaks))) | |
48 | |
49 par(mfrow=c(2,2)) | |
50 print(hist(raw.lib, breaks=br, main="ExpData Log(LibSize)")) # , xlim=lib.x_lim, ylim=lib.y_lim) | |
51 print(hist(raw.feat, breaks=br, main="ExpData Log(NumFeat)")) #, xlim=feat.x_lim, ylim=feat.y_lim) | |
52 print(hist(filt.lib, breaks=br, main="FiltData Log(LibSize)")) # , xlim=lib.x_lim, ylim=lib.y_lim) | |
53 print(hist(filt.feat, breaks=br, main="FiltData Log(NumFeat)")) # , xlim=feat.x_lim, ylim=feat.y_lim) | |
54 | |
55 if (filt.use.ccorrect){ | |
56 par(mfrow=c(2,2)) | |
57 sc <- do.call(CCcorrect, c(sc, filt.ccc)) | |
58 print(plotdimsat(sc, change=T)) | |
59 print(plotdimsat(sc, change=F)) | |
60 } | |
61 return(sc) | |
62 } | |
63 | |
64 do.cluster <- function(sc){ | |
65 sc <- do.call(compdist, c(sc, clust.compdist)) | |
66 sc <- do.call(clustexp, c(sc, clust.clustexp)) | |
67 if (clust.clustexp$sat){ | |
68 print(plotsaturation(sc, disp=F)) | |
69 print(plotsaturation(sc, disp=T)) | |
70 } | |
71 print(plotjaccard(sc)) | |
72 return(sc) | |
73 } | |
74 | |
75 do.outlier <- function(sc){ | |
76 sc <- do.call(findoutliers, c(sc, outlier.findoutliers)) | |
77 if (outlier.use.randomforest){ | |
78 sc <- do.call(rfcorrect, c(sc, outlier.rfcorrect)) | |
79 } | |
80 print(plotbackground(sc)) | |
81 print(plotsensitivity(sc)) | |
82 print(plotoutlierprobs(sc)) | |
83 ## Heatmaps | |
84 test1 <- list() | |
85 test1$side = 3 | |
86 test1$line = 0 #1 #3 | |
87 | |
88 x <- clustheatmap(sc, final=FALSE) | |
89 print(do.call(mtext, c(paste("(Initial)"), test1))) ## spacing is a hack | |
90 x <- clustheatmap(sc, final=TRUE) | |
91 print(do.call(mtext, c(paste("(Final)"), test1))) ## spacing is a hack | |
92 return(sc) | |
93 } | |
94 | |
95 do.clustmap <- function(sc){ | |
96 sc <- do.call(comptsne, c(sc, cluster.comptsne)) | |
97 sc <- do.call(compfr, c(sc, cluster.compfr)) | |
98 return(sc) | |
99 } | |
100 | |
101 | |
102 mkgenelist <- function(sc){ | |
103 ## Layout | |
104 test <- list() | |
105 test$side = 3 | |
106 test$line = 0 #1 #3 | |
107 test$cex = 0.8 | |
108 | |
109 df <- c() | |
110 options(cex = 1) | |
111 lapply(unique(sc@cpart), function(n){ | |
112 dg <- clustdiffgenes(sc, cl=n, pvalue=genelist.pvalue) | |
113 | |
114 dg.goi <- dg[dg$fc > genelist.foldchange,] | |
115 dg.goi.table <- head(dg.goi, genelist.tablelim) | |
116 df <<- rbind(df, cbind(n, dg.goi.table)) | |
117 | |
118 goi <- head(rownames(dg.goi.table), genelist.plotlim) | |
119 print(plotmarkergenes(sc, goi)) | |
120 print(do.call(mtext, c(paste(" Cluster ",n), test))) ## spacing is a hack | |
121 test$line=-1 | |
122 print(do.call(mtext, c(paste(" Sig. Genes"), test))) ## spacing is a hack | |
123 test$line=-2 | |
124 print(do.call(mtext, c(paste(" (fc > ", genelist.foldchange,")"), test))) ## spacing is a hack | |
125 | |
126 }) | |
127 write.table(df, file=out.genelist, sep="\t", quote=F) | |
128 } | |
129 | |
130 pdf(out.pdf) | |
131 | |
132 if (use.filtnormconf){ | |
133 sc <- do.filter(sc) | |
134 message(paste(" - Source:: genes:",nrow(sc@expdata),", cells:",ncol(sc@expdata))) | |
135 message(paste(" - Filter:: genes:",nrow(sc@ndata),", cells:",ncol(sc@ndata))) | |
136 message(paste(" :: ", | |
137 sprintf("%.1f", 100 * nrow(sc@ndata)/nrow(sc@expdata)), "% of genes remain,", | |
138 sprintf("%.1f", 100 * ncol(sc@ndata)/ncol(sc@expdata)), "% of cells remain")) | |
139 } | |
140 | |
141 if (use.cluster){ | |
142 par(mfrow=c(2,2)) | |
143 sc <- do.cluster(sc) | |
144 | |
145 par(mfrow=c(2,2)) | |
146 sc <- do.outlier(sc) | |
147 | |
148 par(mfrow=c(2,2), mar=c(1,1,6,1)) | |
149 sc <- do.clustmap(sc) | |
150 | |
151 mkgenelist(sc) | |
152 } | |
153 | |
154 dev.off() | |
155 | |
156 saveRDS(sc, out.rdat) |