comparison scripts/cluster.R @ 0:4ea021bd7513 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid3 commit f880060c478d42202df5b78a81329f8af56b1138
author iuc
date Thu, 22 Nov 2018 04:43:57 -0500
parents
children 89ee61bcc310
comparison
equal deleted inserted replaced
-1:000000000000 0:4ea021bd7513
1 #!/usr/bin/env R
2 VERSION = "0.2"
3
4 args = commandArgs(trailingOnly = T)
5
6 if (length(args) != 1){
7 message(paste("VERSION:", VERSION))
8 stop("Please provide the config file")
9 }
10
11 suppressWarnings(suppressPackageStartupMessages(require(RaceID)))
12 suppressWarnings(suppressPackageStartupMessages(require(scran)))
13 source(args[1])
14
15
16 do.filter <- function(sc){
17 if (!is.null(filt.lbatch.regexes)){
18 lar <- filt.lbatch.regexes
19 nn <- colnames(sc@expdata)
20 filt$LBatch <- lapply(1:length(lar), function(m){ return( nn[grep(lar[[m]], nn)] ) })
21 }
22
23 sc <- do.call(filterdata, c(sc, filt))
24
25 ## Get histogram metrics for library size and number of features
26 raw.lib <- log(colSums(as.matrix(sc@expdata)))
27 raw.feat <- log(rowSums(as.matrix(sc@expdata)))
28 filt.lib <- log(colSums(getfdata(sc)))
29 filt.feat <- log(rowSums(getfdata(sc)))
30
31 br <- 50
32 ## Determine limits on plots based on the unfiltered data
33 ## (doesn't work, R rejects limits and norm data is too different to compare to exp data
34 ## so let them keep their own ranges)
35
36 ## betterrange <- function(floatval){
37 ## return(10 * (floor(floatval / 10) + 1))
38 ## }
39
40 ## tmp.lib <- hist(raw.lib, breaks=br, plot=F)
41 ## tmp.feat <- hist(raw.feat, breaks=br, plot=F)
42
43 ## lib.y_lim <- c(0,betterrange(max(tmp.lib$counts)))
44 ## lib.x_lim <- c(0,betterrange(max(tmp.lib$breaks)))
45
46 ## feat.y_lim <- c(0,betterrange(max(tmp.feat$counts)))
47 ## feat.x_lim <- c(0,betterrange(max(tmp.feat$breaks)))
48
49 par(mfrow=c(2,2))
50 print(hist(raw.lib, breaks=br, main="ExpData Log(LibSize)")) # , xlim=lib.x_lim, ylim=lib.y_lim)
51 print(hist(raw.feat, breaks=br, main="ExpData Log(NumFeat)")) #, xlim=feat.x_lim, ylim=feat.y_lim)
52 print(hist(filt.lib, breaks=br, main="FiltData Log(LibSize)")) # , xlim=lib.x_lim, ylim=lib.y_lim)
53 print(hist(filt.feat, breaks=br, main="FiltData Log(NumFeat)")) # , xlim=feat.x_lim, ylim=feat.y_lim)
54
55 if (filt.use.ccorrect){
56 par(mfrow=c(2,2))
57 sc <- do.call(CCcorrect, c(sc, filt.ccc))
58 print(plotdimsat(sc, change=T))
59 print(plotdimsat(sc, change=F))
60 }
61 return(sc)
62 }
63
64 do.cluster <- function(sc){
65 sc <- do.call(compdist, c(sc, clust.compdist))
66 sc <- do.call(clustexp, c(sc, clust.clustexp))
67 if (clust.clustexp$sat){
68 print(plotsaturation(sc, disp=F))
69 print(plotsaturation(sc, disp=T))
70 }
71 print(plotjaccard(sc))
72 return(sc)
73 }
74
75 do.outlier <- function(sc){
76 sc <- do.call(findoutliers, c(sc, outlier.findoutliers))
77 if (outlier.use.randomforest){
78 sc <- do.call(rfcorrect, c(sc, outlier.rfcorrect))
79 }
80 print(plotbackground(sc))
81 print(plotsensitivity(sc))
82 print(plotoutlierprobs(sc))
83 ## Heatmaps
84 test1 <- list()
85 test1$side = 3
86 test1$line = 0 #1 #3
87
88 x <- clustheatmap(sc, final=FALSE)
89 print(do.call(mtext, c(paste("(Initial)"), test1))) ## spacing is a hack
90 x <- clustheatmap(sc, final=TRUE)
91 print(do.call(mtext, c(paste("(Final)"), test1))) ## spacing is a hack
92 return(sc)
93 }
94
95 do.clustmap <- function(sc){
96 sc <- do.call(comptsne, c(sc, cluster.comptsne))
97 sc <- do.call(compfr, c(sc, cluster.compfr))
98 return(sc)
99 }
100
101
102 mkgenelist <- function(sc){
103 ## Layout
104 test <- list()
105 test$side = 3
106 test$line = 0 #1 #3
107 test$cex = 0.8
108
109 df <- c()
110 options(cex = 1)
111 lapply(unique(sc@cpart), function(n){
112 dg <- clustdiffgenes(sc, cl=n, pvalue=genelist.pvalue)
113
114 dg.goi <- dg[dg$fc > genelist.foldchange,]
115 dg.goi.table <- head(dg.goi, genelist.tablelim)
116 df <<- rbind(df, cbind(n, dg.goi.table))
117
118 goi <- head(rownames(dg.goi.table), genelist.plotlim)
119 print(plotmarkergenes(sc, goi))
120 print(do.call(mtext, c(paste(" Cluster ",n), test))) ## spacing is a hack
121 test$line=-1
122 print(do.call(mtext, c(paste(" Sig. Genes"), test))) ## spacing is a hack
123 test$line=-2
124 print(do.call(mtext, c(paste(" (fc > ", genelist.foldchange,")"), test))) ## spacing is a hack
125
126 })
127 write.table(df, file=out.genelist, sep="\t", quote=F)
128 }
129
130 pdf(out.pdf)
131
132 if (use.filtnormconf){
133 sc <- do.filter(sc)
134 message(paste(" - Source:: genes:",nrow(sc@expdata),", cells:",ncol(sc@expdata)))
135 message(paste(" - Filter:: genes:",nrow(sc@ndata),", cells:",ncol(sc@ndata)))
136 message(paste(" :: ",
137 sprintf("%.1f", 100 * nrow(sc@ndata)/nrow(sc@expdata)), "% of genes remain,",
138 sprintf("%.1f", 100 * ncol(sc@ndata)/ncol(sc@expdata)), "% of cells remain"))
139 }
140
141 if (use.cluster){
142 par(mfrow=c(2,2))
143 sc <- do.cluster(sc)
144
145 par(mfrow=c(2,2))
146 sc <- do.outlier(sc)
147
148 par(mfrow=c(2,2), mar=c(1,1,6,1))
149 sc <- do.clustmap(sc)
150
151 mkgenelist(sc)
152 }
153
154 dev.off()
155
156 saveRDS(sc, out.rdat)