Mercurial > repos > iuc > raceid_main
comparison scripts/RaceID_class.R @ 0:e01c989c7543 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/raceid commit 39918bfdb08f06862ca395ce58a6f5e4f6dd1a5e
| author | iuc |
|---|---|
| date | Sat, 03 Mar 2018 17:34:16 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:e01c989c7543 |
|---|---|
| 1 ## load required packages. | |
| 2 require(tsne) | |
| 3 require(pheatmap) | |
| 4 require(MASS) | |
| 5 require(cluster) | |
| 6 require(mclust) | |
| 7 require(flexmix) | |
| 8 require(lattice) | |
| 9 require(fpc) | |
| 10 require(amap) | |
| 11 require(RColorBrewer) | |
| 12 require(locfit) | |
| 13 | |
| 14 ## class definition | |
| 15 SCseq <- setClass("SCseq", slots = c(expdata = "data.frame", ndata = "data.frame", fdata = "data.frame", distances = "matrix", tsne = "data.frame", kmeans = "list", background = "list", out = "list", cpart = "vector", fcol = "vector", filterpar = "list", clusterpar = "list", outlierpar ="list" )) | |
| 16 | |
| 17 setValidity("SCseq", | |
| 18 function(object) { | |
| 19 msg <- NULL | |
| 20 if ( ! is.data.frame(object@expdata) ){ | |
| 21 msg <- c(msg, "input data must be data.frame") | |
| 22 }else if ( nrow(object@expdata) < 2 ){ | |
| 23 msg <- c(msg, "input data must have more than one row") | |
| 24 }else if ( ncol(object@expdata) < 2 ){ | |
| 25 msg <- c(msg, "input data must have more than one column") | |
| 26 }else if (sum( apply( is.na(object@expdata),1,sum ) ) > 0 ){ | |
| 27 msg <- c(msg, "NAs are not allowed in input data") | |
| 28 }else if (sum( apply( object@expdata,1,min ) ) < 0 ){ | |
| 29 msg <- c(msg, "negative values are not allowed in input data") | |
| 30 } | |
| 31 if (is.null(msg)) TRUE | |
| 32 else msg | |
| 33 } | |
| 34 ) | |
| 35 | |
| 36 setMethod("initialize", | |
| 37 signature = "SCseq", | |
| 38 definition = function(.Object, expdata ){ | |
| 39 .Object@expdata <- expdata | |
| 40 .Object@ndata <- expdata | |
| 41 .Object@fdata <- expdata | |
| 42 validObject(.Object) | |
| 43 return(.Object) | |
| 44 } | |
| 45 ) | |
| 46 | |
| 47 setGeneric("filterdata", function(object, mintotal=3000, minexpr=5, minnumber=1, maxexpr=500, downsample=FALSE, dsn=1, rseed=17000) standardGeneric("filterdata")) | |
| 48 | |
| 49 setMethod("filterdata", | |
| 50 signature = "SCseq", | |
| 51 definition = function(object,mintotal,minexpr,minnumber,maxexpr,downsample,dsn,rseed) { | |
| 52 if ( ! is.numeric(mintotal) ) stop( "mintotal has to be a positive number" ) else if ( mintotal <= 0 ) stop( "mintotal has to be a positive number" ) | |
| 53 if ( ! is.numeric(minexpr) ) stop( "minexpr has to be a non-negative number" ) else if ( minexpr < 0 ) stop( "minexpr has to be a non-negative number" ) | |
| 54 if ( ! is.numeric(minnumber) ) stop( "minnumber has to be a non-negative integer number" ) else if ( round(minnumber) != minnumber | minnumber < 0 ) stop( "minnumber has to be a non-negative integer number" ) | |
| 55 if ( ! ( is.numeric(downsample) | is.logical(downsample) ) ) stop( "downsample has to be logical (TRUE/FALSE)" ) | |
| 56 if ( ! is.numeric(dsn) ) stop( "dsn has to be a positive integer number" ) else if ( round(dsn) != dsn | dsn <= 0 ) stop( "dsn has to be a positive integer number" ) | |
| 57 object@filterpar <- list(mintotal=mintotal, minexpr=minexpr, minnumber=minnumber, maxexpr=maxexpr, downsample=downsample, dsn=dsn) | |
| 58 object@ndata <- object@expdata[,apply(object@expdata,2,sum,na.rm=TRUE) >= mintotal] | |
| 59 if ( downsample ){ | |
| 60 set.seed(rseed) | |
| 61 object@ndata <- downsample(object@expdata,n=mintotal,dsn=dsn) | |
| 62 }else{ | |
| 63 x <- object@ndata | |
| 64 object@ndata <- as.data.frame( t(t(x)/apply(x,2,sum))*median(apply(x,2,sum,na.rm=TRUE)) + .1 ) | |
| 65 } | |
| 66 x <- object@ndata | |
| 67 object@fdata <- x[apply(x>=minexpr,1,sum,na.rm=TRUE) >= minnumber,] | |
| 68 x <- object@fdata | |
| 69 object@fdata <- x[apply(x,1,max,na.rm=TRUE) < maxexpr,] | |
| 70 return(object) | |
| 71 } | |
| 72 ) | |
| 73 | |
| 74 | |
| 75 downsample <- function(x,n,dsn){ | |
| 76 x <- round( x[,apply(x,2,sum,na.rm=TRUE) >= n], 0) | |
| 77 nn <- min( apply(x,2,sum) ) | |
| 78 for ( j in 1:dsn ){ | |
| 79 z <- data.frame(GENEID=rownames(x)) | |
| 80 rownames(z) <- rownames(x) | |
| 81 initv <- rep(0,nrow(z)) | |
| 82 for ( i in 1:dim(x)[2] ){ | |
| 83 y <- aggregate(rep(1,nn),list(sample(rep(rownames(x),x[,i]),nn)),sum) | |
| 84 na <- names(x)[i] | |
| 85 names(y) <- c("GENEID",na) | |
| 86 rownames(y) <- y$GENEID | |
| 87 z[,na] <- initv | |
| 88 k <- intersect(rownames(z),y$GENEID) | |
| 89 z[k,na] <- y[k,na] | |
| 90 z[is.na(z[,na]),na] <- 0 | |
| 91 } | |
| 92 rownames(z) <- as.vector(z$GENEID) | |
| 93 ds <- if ( j == 1 ) z[,-1] else ds + z[,-1] | |
| 94 } | |
| 95 ds <- ds/dsn + .1 | |
| 96 return(ds) | |
| 97 } | |
| 98 | |
| 99 dist.gen <- function(x,method="euclidean", ...) if ( method %in% c("spearman","pearson","kendall") ) as.dist( 1 - cor(t(x),method=method,...) ) else dist(x,method=method,...) | |
| 100 | |
| 101 dist.gen.pairs <- function(x,y,...) dist.gen(t(cbind(x,y)),...) | |
| 102 | |
| 103 clustfun <- function(x,clustnr=20,bootnr=50,metric="pearson",do.gap=TRUE,SE.method="Tibs2001SEmax",SE.factor=.25,B.gap=50,cln=0,rseed=17000) | |
| 104 { | |
| 105 if ( clustnr < 2) stop("Choose clustnr > 1") | |
| 106 di <- dist.gen(t(x),method=metric) | |
| 107 if ( do.gap | cln > 0 ){ | |
| 108 gpr <- NULL | |
| 109 if ( do.gap ){ | |
| 110 set.seed(rseed) | |
| 111 gpr <- clusGap(as.matrix(di), FUN = kmeans, K.max = clustnr, B = B.gap) | |
| 112 if ( cln == 0 ) cln <- maxSE(gpr$Tab[,3],gpr$Tab[,4],method=SE.method,SE.factor) | |
| 113 } | |
| 114 if ( cln <= 1 ) { | |
| 115 clb <- list(result=list(partition=rep(1,dim(x)[2])),bootmean=1) | |
| 116 names(clb$result$partition) <- names(x) | |
| 117 return(list(x=x,clb=clb,gpr=gpr,di=di)) | |
| 118 } | |
| 119 clb <- clusterboot(di,B=bootnr,distances=FALSE,bootmethod="boot",clustermethod=KmeansCBI,krange=cln,scaling=FALSE,multipleboot=FALSE,bscompare=TRUE,seed=rseed) | |
| 120 return(list(x=x,clb=clb,gpr=gpr,di=di)) | |
| 121 } | |
| 122 } | |
| 123 | |
| 124 KmeansCBI <- function (data, krange, k = NULL, scaling = FALSE, runs = 1, | |
| 125 criterion = "ch", method="euclidean",...) | |
| 126 { | |
| 127 if (!is.null(k)) | |
| 128 krange <- k | |
| 129 if (!identical(scaling, FALSE)) | |
| 130 sdata <- scale(data, center = TRUE, scale = scaling) | |
| 131 else sdata <- data | |
| 132 c1 <- Kmeansruns(sdata, krange, runs = runs, criterion = criterion, method = method, | |
| 133 ...) | |
| 134 partition <- c1$cluster | |
| 135 cl <- list() | |
| 136 nc <- krange | |
| 137 for (i in 1:nc) cl[[i]] <- partition == i | |
| 138 out <- list(result = c1, nc = nc, clusterlist = cl, partition = partition, | |
| 139 clustermethod = "kmeans") | |
| 140 out | |
| 141 } | |
| 142 | |
| 143 Kmeansruns <- function (data, krange = 2:10, criterion = "ch", iter.max = 100, | |
| 144 runs = 100, scaledata = FALSE, alpha = 0.001, critout = FALSE, | |
| 145 plot = FALSE, method="euclidean", ...) | |
| 146 { | |
| 147 data <- as.matrix(data) | |
| 148 if (criterion == "asw") | |
| 149 sdata <- dist(data) | |
| 150 if (scaledata) | |
| 151 data <- scale(data) | |
| 152 cluster1 <- 1 %in% krange | |
| 153 crit <- numeric(max(krange)) | |
| 154 km <- list() | |
| 155 for (k in krange) { | |
| 156 if (k > 1) { | |
| 157 minSS <- Inf | |
| 158 kmopt <- NULL | |
| 159 for (i in 1:runs) { | |
| 160 options(show.error.messages = FALSE) | |
| 161 repeat { | |
| 162 kmm <- try(Kmeans(data, k, iter.max = iter.max, method=method, | |
| 163 ...)) | |
| 164 if (class(kmm) != "try-error") | |
| 165 break | |
| 166 } | |
| 167 options(show.error.messages = TRUE) | |
| 168 swss <- sum(kmm$withinss) | |
| 169 if (swss < minSS) { | |
| 170 kmopt <- kmm | |
| 171 minSS <- swss | |
| 172 } | |
| 173 if (plot) { | |
| 174 par(ask = TRUE) | |
| 175 pairs(data, col = kmm$cluster, main = swss) | |
| 176 } | |
| 177 } | |
| 178 km[[k]] <- kmopt | |
| 179 crit[k] <- switch(criterion, asw = cluster.stats(sdata, | |
| 180 km[[k]]$cluster)$avg.silwidth, ch = calinhara(data, | |
| 181 km[[k]]$cluster)) | |
| 182 if (critout) | |
| 183 cat(k, " clusters ", crit[k], "\n") | |
| 184 } | |
| 185 } | |
| 186 if (cluster1) | |
| 187 cluster1 <- dudahart2(data, km[[2]]$cluster, alpha = alpha)$cluster1 | |
| 188 k.best <- which.max(crit) | |
| 189 if (cluster1) | |
| 190 k.best <- 1 | |
| 191 km[[k.best]]$crit <- crit | |
| 192 km[[k.best]]$bestk <- k.best | |
| 193 out <- km[[k.best]] | |
| 194 out | |
| 195 } | |
| 196 | |
| 197 binompval <- function(p,N,n){ | |
| 198 pval <- pbinom(n,round(N,0),p,lower.tail=TRUE) | |
| 199 pval[!is.na(pval) & pval > 0.5] <- 1-pval[!is.na(pval) & pval > 0.5] | |
| 200 return(pval) | |
| 201 } | |
| 202 | |
| 203 setGeneric("clustexp", function(object,clustnr=20,bootnr=50,metric="pearson",do.gap=TRUE,SE.method="Tibs2001SEmax",SE.factor=.25,B.gap=50,cln=0,rseed=17000) standardGeneric("clustexp")) | |
| 204 | |
| 205 setMethod("clustexp", | |
| 206 signature = "SCseq", | |
| 207 definition = function(object,clustnr,bootnr,metric,do.gap,SE.method,SE.factor,B.gap,cln,rseed) { | |
| 208 if ( ! is.numeric(clustnr) ) stop("clustnr has to be a positive integer") else if ( round(clustnr) != clustnr | clustnr <= 0 ) stop("clustnr has to be a positive integer") | |
| 209 if ( ! is.numeric(bootnr) ) stop("bootnr has to be a positive integer") else if ( round(bootnr) != bootnr | bootnr <= 0 ) stop("bootnr has to be a positive integer") | |
| 210 if ( ! ( metric %in% c( "spearman","pearson","kendall","euclidean","maximum","manhattan","canberra","binary","minkowski") ) ) stop("metric has to be one of the following: spearman, pearson, kendall, euclidean, maximum, manhattan, canberra, binary, minkowski") | |
| 211 if ( ! ( SE.method %in% c( "firstSEmax","Tibs2001SEmax","globalSEmax","firstmax","globalmax") ) ) stop("SE.method has to be one of the following: firstSEmax, Tibs2001SEmax, globalSEmax, firstmax, globalmax") | |
| 212 if ( ! is.numeric(SE.factor) ) stop("SE.factor has to be a non-negative integer") else if ( SE.factor < 0 ) stop("SE.factor has to be a non-negative integer") | |
| 213 if ( ! ( is.numeric(do.gap) | is.logical(do.gap) ) ) stop( "do.gap has to be logical (TRUE/FALSE)" ) | |
| 214 if ( ! is.numeric(B.gap) ) stop("B.gap has to be a positive integer") else if ( round(B.gap) != B.gap | B.gap <= 0 ) stop("B.gap has to be a positive integer") | |
| 215 if ( ! is.numeric(cln) ) stop("cln has to be a non-negative integer") else if ( round(cln) != cln | cln < 0 ) stop("cln has to be a non-negative integer") | |
| 216 if ( ! is.numeric(rseed) ) stop("rseed has to be numeric") | |
| 217 if ( !do.gap & cln == 0 ) stop("cln has to be a positive integer or do.gap has to be TRUE") | |
| 218 object@clusterpar <- list(clustnr=clustnr,bootnr=bootnr,metric=metric,do.gap=do.gap,SE.method=SE.method,SE.factor=SE.factor,B.gap=B.gap,cln=cln,rseed=rseed) | |
| 219 y <- clustfun(object@fdata,clustnr,bootnr,metric,do.gap,SE.method,SE.factor,B.gap,cln,rseed) | |
| 220 object@kmeans <- list(kpart=y$clb$result$partition, jaccard=y$clb$bootmean, gap=y$gpr) | |
| 221 object@distances <- as.matrix( y$di ) | |
| 222 set.seed(111111) | |
| 223 object@fcol <- sample(rainbow(max(y$clb$result$partition))) | |
| 224 return(object) | |
| 225 } | |
| 226 ) | |
| 227 | |
| 228 setGeneric("findoutliers", function(object,outminc=5,outlg=2,probthr=1e-3,thr=2**-(1:40),outdistquant=.75) standardGeneric("findoutliers")) | |
| 229 | |
| 230 setMethod("findoutliers", | |
| 231 signature = "SCseq", | |
| 232 definition = function(object,outminc,outlg,probthr,thr,outdistquant) { | |
| 233 if ( length(object@kmeans$kpart) == 0 ) stop("run clustexp before findoutliers") | |
| 234 if ( ! is.numeric(outminc) ) stop("outminc has to be a non-negative integer") else if ( round(outminc) != outminc | outminc < 0 ) stop("outminc has to be a non-negative integer") | |
| 235 if ( ! is.numeric(outlg) ) stop("outlg has to be a non-negative integer") else if ( round(outlg) != outlg | outlg < 0 ) stop("outlg has to be a non-negative integer") | |
| 236 if ( ! is.numeric(probthr) ) stop("probthr has to be a number between 0 and 1") else if ( probthr < 0 | probthr > 1 ) stop("probthr has to be a number between 0 and 1") | |
| 237 if ( ! is.numeric(thr) ) stop("thr hast to be a vector of numbers between 0 and 1") else if ( min(thr) < 0 | max(thr) > 1 ) stop("thr hast to be a vector of numbers between 0 and 1") | |
| 238 if ( ! is.numeric(outdistquant) ) stop("outdistquant has to be a number between 0 and 1") else if ( outdistquant < 0 | outdistquant > 1 ) stop("outdistquant has to be a number between 0 and 1") | |
| 239 | |
| 240 object@outlierpar <- list( outminc=outminc,outlg=outlg,probthr=probthr,thr=thr,outdistquant=outdistquant ) | |
| 241 ### calibrate background model | |
| 242 m <- log2(apply(object@fdata,1,mean)) | |
| 243 v <- log2(apply(object@fdata,1,var)) | |
| 244 f <- m > -Inf & v > -Inf | |
| 245 m <- m[f] | |
| 246 v <- v[f] | |
| 247 mm <- -8 | |
| 248 repeat{ | |
| 249 fit <- lm(v ~ m + I(m^2)) | |
| 250 if( coef(fit)[3] >= 0 | mm >= 3){ | |
| 251 break | |
| 252 } | |
| 253 mm <- mm + .5 | |
| 254 f <- m > mm | |
| 255 m <- m[f] | |
| 256 v <- v[f] | |
| 257 } | |
| 258 object@background <- list() | |
| 259 object@background$vfit <- fit | |
| 260 object@background$lvar <- function(x,object) 2**(coef(object@background$vfit)[1] + log2(x)*coef(object@background$vfit)[2] + coef(object@background$vfit)[3] * log2(x)**2) | |
| 261 object@background$lsize <- function(x,object) x**2/(max(x + 1e-6,object@background$lvar(x,object)) - x) | |
| 262 | |
| 263 ### identify outliers | |
| 264 out <- c() | |
| 265 stest <- rep(0,length(thr)) | |
| 266 cprobs <- c() | |
| 267 for ( n in 1:max(object@kmeans$kpart) ){ | |
| 268 if ( sum(object@kmeans$kpart == n) == 1 ){ cprobs <- append(cprobs,.5); names(cprobs)[length(cprobs)] <- names(object@kmeans$kpart)[object@kmeans$kpart == n]; next } | |
| 269 x <- object@fdata[,object@kmeans$kpart == n] | |
| 270 x <- x[apply(x,1,max) > outminc,] | |
| 271 z <- t( apply(x,1,function(x){ apply( cbind( pnbinom(round(x,0),mu=mean(x),size=object@background$lsize(mean(x),object)) , 1 - pnbinom(round(x,0),mu=mean(x),size=object@background$lsize(mean(x),object)) ),1, min) } ) ) | |
| 272 cp <- apply(z,2,function(x){ y <- p.adjust(x,method="BH"); y <- y[order(y,decreasing=FALSE)]; return(y[outlg]);}) | |
| 273 f <- cp < probthr | |
| 274 cprobs <- append(cprobs,cp) | |
| 275 if ( sum(f) > 0 ) out <- append(out,names(x)[f]) | |
| 276 for ( j in 1:length(thr) ) stest[j] <- stest[j] + sum( cp < thr[j] ) | |
| 277 } | |
| 278 object@out <-list(out=out,stest=stest,thr=thr,cprobs=cprobs) | |
| 279 | |
| 280 ### cluster outliers | |
| 281 clp2p.cl <- c() | |
| 282 cols <- names(object@fdata) | |
| 283 di <- as.data.frame(object@distances) | |
| 284 for ( i in 1:max(object@kmeans$kpart) ) { | |
| 285 tcol <- cols[object@kmeans$kpart == i] | |
| 286 if ( sum(!(tcol %in% out)) > 1 ) clp2p.cl <- append(clp2p.cl,as.vector(t(di[tcol[!(tcol %in% out)],tcol[!(tcol %in% out)]]))) | |
| 287 } | |
| 288 clp2p.cl <- clp2p.cl[clp2p.cl>0] | |
| 289 | |
| 290 cpart <- object@kmeans$kpart | |
| 291 cadd <- list() | |
| 292 if ( length(out) > 0 ){ | |
| 293 if (length(out) == 1){ | |
| 294 cadd <- list(out) | |
| 295 }else{ | |
| 296 n <- out | |
| 297 m <- as.data.frame(di[out,out]) | |
| 298 | |
| 299 for ( i in 1:length(out) ){ | |
| 300 if ( length(n) > 1 ){ | |
| 301 o <- order(apply(cbind(m,1:dim(m)[1]),1,function(x) min(x[1:(length(x)-1)][-x[length(x)]])),decreasing=FALSE) | |
| 302 m <- m[o,o] | |
| 303 n <- n[o] | |
| 304 f <- m[,1] < quantile(clp2p.cl,outdistquant) | m[,1] == min(clp2p.cl) | |
| 305 ind <- 1 | |
| 306 if ( sum(f) > 1 ) for ( j in 2:sum(f) ) if ( apply(m[f,f][j,c(ind,j)] > quantile(clp2p.cl,outdistquant) ,1,sum) == 0 ) ind <- append(ind,j) | |
| 307 cadd[[i]] <- n[f][ind] | |
| 308 g <- ! n %in% n[f][ind] | |
| 309 n <- n[g] | |
| 310 m <- m[g,g] | |
| 311 if ( sum(g) == 0 ) break | |
| 312 | |
| 313 }else if (length(n) == 1){ | |
| 314 cadd[[i]] <- n | |
| 315 break | |
| 316 } | |
| 317 } | |
| 318 } | |
| 319 | |
| 320 for ( i in 1:length(cadd) ){ | |
| 321 cpart[cols %in% cadd[[i]]] <- max(cpart) + 1 | |
| 322 } | |
| 323 } | |
| 324 | |
| 325 ### determine final clusters | |
| 326 for ( i in 1:max(cpart) ){ | |
| 327 d <- object@fdata[,cols[cpart == i]] | |
| 328 if ( sum(cpart == i) == 1 ) cent <- d else cent <- apply(d,1,mean) | |
| 329 if ( i == 1 ) dcent <- data.frame(cent) else dcent <- cbind(dcent,cent) | |
| 330 if ( i == 1 ) tmp <- data.frame(apply(object@fdata[,cols],2,dist.gen.pairs,y=cent,method=object@clusterpar$metric)) else tmp <- cbind(tmp,apply(object@fdata[,cols],2,dist.gen.pairs,y=cent,method=object@clusterpar$metric)) | |
| 331 } | |
| 332 cpart <- apply(tmp,1,function(x) order(x,decreasing=FALSE)[1]) | |
| 333 | |
| 334 for ( i in max(cpart):1){if (sum(cpart==i)==0) cpart[cpart>i] <- cpart[cpart>i] - 1 } | |
| 335 | |
| 336 object@cpart <- cpart | |
| 337 | |
| 338 set.seed(111111) | |
| 339 object@fcol <- sample(rainbow(max(cpart))) | |
| 340 return(object) | |
| 341 } | |
| 342 ) | |
| 343 | |
| 344 setGeneric("plotgap", function(object) standardGeneric("plotgap")) | |
| 345 | |
| 346 setMethod("plotgap", | |
| 347 signature = "SCseq", | |
| 348 definition = function(object){ | |
| 349 if ( length(object@kmeans$kpart) == 0 ) stop("run clustexp before plotgap") | |
| 350 plot(object@kmeans$gap) | |
| 351 } | |
| 352 ) | |
| 353 | |
| 354 setGeneric("plotsilhouette", function(object) standardGeneric("plotsilhouette")) | |
| 355 | |
| 356 setMethod("plotsilhouette", | |
| 357 signature = "SCseq", | |
| 358 definition = function(object){ | |
| 359 if ( length(object@kmeans$kpart) == 0 ) stop("run clustexp before plotsilhouette") | |
| 360 if ( length(unique(object@kmeans$kpart)) < 2 ) stop("only a single cluster: no silhouette plot") | |
| 361 kpart <- object@kmeans$kpart | |
| 362 distances <- dist.gen(object@distances) | |
| 363 si <- silhouette(kpart,distances) | |
| 364 plot(si) | |
| 365 } | |
| 366 ) | |
| 367 | |
| 368 setGeneric("plotjaccard", function(object) standardGeneric("plotjaccard")) | |
| 369 | |
| 370 setMethod("plotjaccard", | |
| 371 signature = "SCseq", | |
| 372 definition = function(object){ | |
| 373 if ( length(object@kmeans$kpart) == 0 ) stop("run clustexp before plotjaccard") | |
| 374 if ( length(unique(object@kmeans$kpart)) < 2 ) stop("only a single cluster: no Jaccard's similarity plot") | |
| 375 barplot(object@kmeans$jaccard,names.arg=1:length(object@kmeans$jaccard),ylab="Jaccard's similarity") | |
| 376 } | |
| 377 ) | |
| 378 | |
| 379 setGeneric("plotoutlierprobs", function(object) standardGeneric("plotoutlierprobs")) | |
| 380 | |
| 381 setMethod("plotoutlierprobs", | |
| 382 signature = "SCseq", | |
| 383 definition = function(object){ | |
| 384 if ( length(object@cpart) == 0 ) stop("run findoutliers before plotoutlierprobs") | |
| 385 p <- object@kmeans$kpart[ order(object@kmeans$kpart,decreasing=FALSE)] | |
| 386 x <- object@out$cprobs[names(p)] | |
| 387 fcol <- object@fcol | |
| 388 for ( i in 1:max(p) ){ | |
| 389 y <- -log10(x + 2.2e-16) | |
| 390 y[p != i] <- 0 | |
| 391 if ( i == 1 ) b <- barplot(y,ylim=c(0,max(-log10(x + 2.2e-16))*1.1),col=fcol[i],border=fcol[i],names.arg=FALSE,ylab="-log10prob") else barplot(y,add=TRUE,col=fcol[i],border=fcol[i],names.arg=FALSE,axes=FALSE) | |
| 392 } | |
| 393 abline(-log10(object@outlierpar$probthr),0,col="black",lty=2) | |
| 394 d <- b[2,1] - b[1,1] | |
| 395 y <- 0 | |
| 396 for ( i in 1:max(p) ) y <- append(y,b[sum(p <=i),1] + d/2) | |
| 397 axis(1,at=(y[1:(length(y)-1)] + y[-1])/2,lab=1:max(p)) | |
| 398 box() | |
| 399 } | |
| 400 ) | |
| 401 | |
| 402 setGeneric("plotbackground", function(object) standardGeneric("plotbackground")) | |
| 403 | |
| 404 setMethod("plotbackground", | |
| 405 signature = "SCseq", | |
| 406 definition = function(object){ | |
| 407 if ( length(object@cpart) == 0 ) stop("run findoutliers before plotbackground") | |
| 408 m <- apply(object@fdata,1,mean) | |
| 409 v <- apply(object@fdata,1,var) | |
| 410 fit <- locfit(v~lp(m,nn=.7),family="gamma",maxk=500) | |
| 411 plot(log2(m),log2(v),pch=20,xlab="log2mean",ylab="log2var",col="grey") | |
| 412 lines(log2(m[order(m)]),log2(object@background$lvar(m[order(m)],object)),col="red",lwd=2) | |
| 413 lines(log2(m[order(m)]),log2(fitted(fit)[order(m)]),col="orange",lwd=2,lty=2) | |
| 414 legend("topleft",legend=substitute(paste("y = ",a,"*x^2 + ",b,"*x + ",d,sep=""),list(a=round(coef(object@background$vfit)[3],2),b=round(coef(object@background$vfit)[2],2),d=round(coef(object@background$vfit)[1],2))),bty="n") | |
| 415 } | |
| 416 ) | |
| 417 | |
| 418 setGeneric("plotsensitivity", function(object) standardGeneric("plotsensitivity")) | |
| 419 | |
| 420 setMethod("plotsensitivity", | |
| 421 signature = "SCseq", | |
| 422 definition = function(object){ | |
| 423 if ( length(object@cpart) == 0 ) stop("run findoutliers before plotsensitivity") | |
| 424 plot(log10(object@out$thr), object@out$stest, type="l",xlab="log10 Probability cutoff", ylab="Number of outliers") | |
| 425 abline(v=log10(object@outlierpar$probthr),col="red",lty=2) | |
| 426 } | |
| 427 ) | |
| 428 | |
| 429 setGeneric("clustdiffgenes", function(object,pvalue=.01) standardGeneric("clustdiffgenes")) | |
| 430 | |
| 431 setMethod("clustdiffgenes", | |
| 432 signature = "SCseq", | |
| 433 definition = function(object,pvalue){ | |
| 434 if ( length(object@cpart) == 0 ) stop("run findoutliers before clustdiffgenes") | |
| 435 if ( ! is.numeric(pvalue) ) stop("pvalue has to be a number between 0 and 1") else if ( pvalue < 0 | pvalue > 1 ) stop("pvalue has to be a number between 0 and 1") | |
| 436 cdiff <- list() | |
| 437 x <- object@ndata | |
| 438 y <- object@expdata[,names(object@ndata)] | |
| 439 part <- object@cpart | |
| 440 for ( i in 1:max(part) ){ | |
| 441 if ( sum(part == i) == 0 ) next | |
| 442 m <- apply(x,1,mean) | |
| 443 n <- if ( sum(part == i) > 1 ) apply(x[,part == i],1,mean) else x[,part == i] | |
| 444 no <- if ( sum(part == i) > 1 ) median(apply(y[,part == i],2,sum))/median(apply(x[,part == i],2,sum)) else sum(y[,part == i])/sum(x[,part == i]) | |
| 445 m <- m*no | |
| 446 n <- n*no | |
| 447 pv <- binompval(m/sum(m),sum(n),n) | |
| 448 d <- data.frame(mean.all=m,mean.cl=n,fc=n/m,pv=pv)[order(pv,decreasing=FALSE),] | |
| 449 cdiff[[paste("cl",i,sep=".")]] <- d[d$pv < pvalue,] | |
| 450 } | |
| 451 return(cdiff) | |
| 452 } | |
| 453 ) | |
| 454 | |
| 455 setGeneric("diffgenes", function(object,cl1,cl2,mincount=5) standardGeneric("diffgenes")) | |
| 456 | |
| 457 setMethod("diffgenes", | |
| 458 signature = "SCseq", | |
| 459 definition = function(object,cl1,cl2,mincount){ | |
| 460 part <- object@cpart | |
| 461 cl1 <- c(cl1) | |
| 462 cl2 <- c(cl2) | |
| 463 if ( length(part) == 0 ) stop("run findoutliers before diffgenes") | |
| 464 if ( ! is.numeric(mincount) ) stop("mincount has to be a non-negative number") else if ( mincount < 0 ) stop("mincount has to be a non-negative number") | |
| 465 if ( length(intersect(cl1, part)) < length(unique(cl1)) ) stop( paste("cl1 has to be a subset of ",paste(sort(unique(part)),collapse=","),"\n",sep="") ) | |
| 466 if ( length(intersect(cl2, part)) < length(unique(cl2)) ) stop( paste("cl2 has to be a subset of ",paste(sort(unique(part)),collapse=","),"\n",sep="") ) | |
| 467 f <- apply(object@ndata[,part %in% c(cl1,cl2)],1,max) > mincount | |
| 468 x <- object@ndata[f,part %in% cl1] | |
| 469 y <- object@ndata[f,part %in% cl2] | |
| 470 if ( sum(part %in% cl1) == 1 ) m1 <- x else m1 <- apply(x,1,mean) | |
| 471 if ( sum(part %in% cl2) == 1 ) m2 <- y else m2 <- apply(y,1,mean) | |
| 472 if ( sum(part %in% cl1) == 1 ) s1 <- sqrt(x) else s1 <- sqrt(apply(x,1,var)) | |
| 473 if ( sum(part %in% cl2) == 1 ) s2 <- sqrt(y) else s2 <- sqrt(apply(y,1,var)) | |
| 474 | |
| 475 d <- ( m1 - m2 )/ apply( cbind( s1, s2 ),1,mean ) | |
| 476 names(d) <- rownames(object@ndata)[f] | |
| 477 if ( sum(part %in% cl1) == 1 ){ | |
| 478 names(x) <- names(d) | |
| 479 x <- x[order(d,decreasing=TRUE)] | |
| 480 }else{ | |
| 481 x <- x[order(d,decreasing=TRUE),] | |
| 482 } | |
| 483 if ( sum(part %in% cl2) == 1 ){ | |
| 484 names(y) <- names(d) | |
| 485 y <- y[order(d,decreasing=TRUE)] | |
| 486 }else{ | |
| 487 y <- y[order(d,decreasing=TRUE),] | |
| 488 } | |
| 489 return(list(z=d[order(d,decreasing=TRUE)],cl1=x,cl2=y,cl1n=cl1,cl2n=cl2)) | |
| 490 } | |
| 491 ) | |
| 492 | |
| 493 plotdiffgenes <- function(z,gene=g){ | |
| 494 if ( ! is.list(z) ) stop("first arguments needs to be output of function diffgenes") | |
| 495 if ( length(z) < 5 ) stop("first arguments needs to be output of function diffgenes") | |
| 496 if ( sum(names(z) == c("z","cl1","cl2","cl1n","cl2n")) < 5 ) stop("first arguments needs to be output of function diffgenes") | |
| 497 if ( length(gene) > 1 ) stop("only single value allowed for argument gene") | |
| 498 if ( !is.numeric(gene) & !(gene %in% names(z$z)) ) stop("argument gene needs to be within rownames of first argument or a positive integer number") | |
| 499 if ( is.numeric(gene) ){ if ( gene < 0 | round(gene) != gene ) stop("argument gene needs to be within rownames of first argument or a positive integer number") } | |
| 500 x <- if ( is.null(dim(z$cl1)) ) z$cl1[gene] else t(z$cl1[gene,]) | |
| 501 y <- if ( is.null(dim(z$cl2)) ) z$cl2[gene] else t(z$cl2[gene,]) | |
| 502 plot(1:length(c(x,y)),c(x,y),ylim=c(0,max(c(x,y))),xlab="",ylab="Expression",main=gene,cex=0,axes=FALSE) | |
| 503 axis(2) | |
| 504 box() | |
| 505 u <- 1:length(x) | |
| 506 rect(u - .5,0,u + .5,x,col="red") | |
| 507 v <- c(min(u) - .5,max(u) + .5) | |
| 508 axis(1,at=mean(v),lab=paste(z$cl1n,collapse=",")) | |
| 509 lines(v,rep(mean(x),length(v))) | |
| 510 lines(v,rep(mean(x)-sqrt(var(x)),length(v)),lty=2) | |
| 511 lines(v,rep(mean(x)+sqrt(var(x)),length(v)),lty=2) | |
| 512 | |
| 513 u <- ( length(x) + 1 ):length(c(x,y)) | |
| 514 v <- c(min(u) - .5,max(u) + .5) | |
| 515 rect(u - .5,0,u + .5,y,col="blue") | |
| 516 axis(1,at=mean(v),lab=paste(z$cl2n,collapse=",")) | |
| 517 lines(v,rep(mean(y),length(v))) | |
| 518 lines(v,rep(mean(y)-sqrt(var(y)),length(v)),lty=2) | |
| 519 lines(v,rep(mean(y)+sqrt(var(y)),length(v)),lty=2) | |
| 520 abline(v=length(x) + .5) | |
| 521 } | |
| 522 | |
| 523 setGeneric("clustheatmap", function(object,final=FALSE,hmethod="single") standardGeneric("clustheatmap")) | |
| 524 | |
| 525 setMethod("clustheatmap", | |
| 526 signature = "SCseq", | |
| 527 definition = function(object,final,hmethod){ | |
| 528 if ( final & length(object@cpart) == 0 ) stop("run findoutliers before clustheatmap") | |
| 529 if ( !final & length(object@kmeans$kpart) == 0 ) stop("run clustexp before clustheatmap") | |
| 530 x <- object@fdata | |
| 531 part <- if ( final ) object@cpart else object@kmeans$kpart | |
| 532 na <- c() | |
| 533 j <- 0 | |
| 534 for ( i in 1:max(part) ){ | |
| 535 if ( sum(part == i) == 0 ) next | |
| 536 j <- j + 1 | |
| 537 na <- append(na,i) | |
| 538 d <- x[,part == i] | |
| 539 if ( sum(part == i) == 1 ) cent <- d else cent <- apply(d,1,mean) | |
| 540 if ( j == 1 ) tmp <- data.frame(cent) else tmp <- cbind(tmp,cent) | |
| 541 } | |
| 542 names(tmp) <- paste("cl",na,sep=".") | |
| 543 if ( max(part) > 1 ) cclmo <- hclust(dist.gen(as.matrix(dist.gen(t(tmp),method=object@clusterpar$metric))),method=hmethod)$order else cclmo <- 1 | |
| 544 q <- part | |
| 545 for ( i in 1:max(part) ){ | |
| 546 q[part == na[cclmo[i]]] <- i | |
| 547 } | |
| 548 part <- q | |
| 549 di <- as.data.frame( as.matrix( dist.gen(t(object@distances)) ) ) | |
| 550 pto <- part[order(part,decreasing=FALSE)] | |
| 551 ptn <- c() | |
| 552 for ( i in 1:max(pto) ){ pt <- names(pto)[pto == i]; z <- if ( length(pt) == 1 ) pt else pt[hclust(as.dist(t(di[pt,pt])),method=hmethod)$order]; ptn <- append(ptn,z) } | |
| 553 col <- object@fcol | |
| 554 mi <- min(di,na.rm=TRUE) | |
| 555 ma <- max(di,na.rm=TRUE) | |
| 556 layout(matrix(data=c(1,3,2,4), nrow=2, ncol=2), widths=c(5,1,5,1), heights=c(5,1,1,1)) | |
| 557 ColorRamp <- colorRampPalette(brewer.pal(n = 7,name = "RdYlBu"))(100) | |
| 558 ColorLevels <- seq(mi, ma, length=length(ColorRamp)) | |
| 559 if ( mi == ma ){ | |
| 560 ColorLevels <- seq(0.99*mi, 1.01*ma, length=length(ColorRamp)) | |
| 561 } | |
| 562 par(mar = c(3,5,2.5,2)) | |
| 563 image(as.matrix(di[ptn,ptn]),col=ColorRamp,axes=FALSE) | |
| 564 abline(0,1) | |
| 565 box() | |
| 566 | |
| 567 tmp <- c() | |
| 568 for ( u in 1:max(part) ){ | |
| 569 ol <- (0:(length(part) - 1)/(length(part) - 1))[ptn %in% names(x)[part == u]] | |
| 570 points(rep(0,length(ol)),ol,col=col[cclmo[u]],pch=15,cex=.75) | |
| 571 points(ol,rep(0,length(ol)),col=col[cclmo[u]],pch=15,cex=.75) | |
| 572 tmp <- append(tmp,mean(ol)) | |
| 573 } | |
| 574 axis(1,at=tmp,lab=cclmo) | |
| 575 axis(2,at=tmp,lab=cclmo) | |
| 576 par(mar = c(3,2.5,2.5,2)) | |
| 577 image(1, ColorLevels, | |
| 578 matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1), | |
| 579 col=ColorRamp, | |
| 580 xlab="",ylab="", | |
| 581 xaxt="n") | |
| 582 layout(1) | |
| 583 return(cclmo) | |
| 584 } | |
| 585 ) | |
| 586 | |
| 587 setGeneric("comptsne", function(object,rseed=15555) standardGeneric("comptsne")) | |
| 588 | |
| 589 setMethod("comptsne", | |
| 590 signature = "SCseq", | |
| 591 definition = function(object,rseed){ | |
| 592 if ( length(object@kmeans$kpart) == 0 ) stop("run clustexp before comptsne") | |
| 593 set.seed(rseed) | |
| 594 di <- dist.gen(as.matrix(object@distances)) | |
| 595 ts <- tsne(di,k=2) | |
| 596 object@tsne <- as.data.frame(ts) | |
| 597 return(object) | |
| 598 } | |
| 599 ) | |
| 600 | |
| 601 setGeneric("plottsne", function(object,final=TRUE) standardGeneric("plottsne")) | |
| 602 | |
| 603 setMethod("plottsne", | |
| 604 signature = "SCseq", | |
| 605 definition = function(object,final){ | |
| 606 if ( length(object@tsne) == 0 ) stop("run comptsne before plottsne") | |
| 607 if ( final & length(object@cpart) == 0 ) stop("run findoutliers before plottsne") | |
| 608 if ( !final & length(object@kmeans$kpart) == 0 ) stop("run clustexp before plottsne") | |
| 609 part <- if ( final ) object@cpart else object@kmeans$kpart | |
| 610 plot(object@tsne,xlab="Dim 1",ylab="Dim 2",pch=20,cex=1.5,col="lightgrey") | |
| 611 for ( i in 1:max(part) ){ | |
| 612 if ( sum(part == i) > 0 ) text(object@tsne[part == i,1],object@tsne[part == i,2],i,col=object@fcol[i],cex=.75,font=4) | |
| 613 } | |
| 614 } | |
| 615 ) | |
| 616 | |
| 617 setGeneric("plotlabelstsne", function(object,labels=NULL) standardGeneric("plotlabelstsne")) | |
| 618 | |
| 619 setMethod("plotlabelstsne", | |
| 620 signature = "SCseq", | |
| 621 definition = function(object,labels){ | |
| 622 if ( is.null(labels ) ) labels <- names(object@ndata) | |
| 623 if ( length(object@tsne) == 0 ) stop("run comptsne before plotlabelstsne") | |
| 624 plot(object@tsne,xlab="Dim 1",ylab="Dim 2",pch=20,cex=1.5,col="lightgrey") | |
| 625 text(object@tsne[,1],object@tsne[,2],labels,cex=.5) | |
| 626 } | |
| 627 ) | |
| 628 | |
| 629 setGeneric("plotsymbolstsne", function(object,types=NULL) standardGeneric("plotsymbolstsne")) | |
| 630 | |
| 631 setMethod("plotsymbolstsne", | |
| 632 signature = "SCseq", | |
| 633 definition = function(object,types){ | |
| 634 if ( is.null(types) ) types <- names(object@fdata) | |
| 635 if ( length(object@tsne) == 0 ) stop("run comptsne before plotsymbolstsne") | |
| 636 if ( length(types) != ncol(object@fdata) ) stop("types argument has wrong length. Length has to equal to the column number of object@ndata") | |
| 637 coloc <- rainbow(length(unique(types))) | |
| 638 syms <- c() | |
| 639 plot(object@tsne,xlab="Dim 1",ylab="Dim 2",pch=20,col="grey") | |
| 640 for ( i in 1:length(unique(types)) ){ | |
| 641 f <- types == sort(unique(types))[i] | |
| 642 syms <- append( syms, ( (i-1) %% 25 ) + 1 ) | |
| 643 points(object@tsne[f,1],object@tsne[f,2],col=coloc[i],pch=( (i-1) %% 25 ) + 1,cex=1) | |
| 644 } | |
| 645 legend("topleft", legend=sort(unique(types)), col=coloc, pch=syms) | |
| 646 } | |
| 647 ) | |
| 648 | |
| 649 setGeneric("plotexptsne", function(object,g,n="",logsc=FALSE) standardGeneric("plotexptsne")) | |
| 650 | |
| 651 setMethod("plotexptsne", | |
| 652 signature = "SCseq", | |
| 653 definition = function(object,g,n="",logsc=FALSE){ | |
| 654 if ( length(object@tsne) == 0 ) stop("run comptsne before plottsne") | |
| 655 if ( length(intersect(g,rownames(object@ndata))) < length(unique(g)) ) stop("second argument does not correspond to set of rownames slot ndata of SCseq object") | |
| 656 if ( !is.numeric(logsc) & !is.logical(logsc) ) stop("argument logsc has to be logical (TRUE/FALSE)") | |
| 657 if ( n == "" ) n <- g[1] | |
| 658 l <- apply(object@ndata[g,] - .1,2,sum) + .1 | |
| 659 if (logsc) { | |
| 660 f <- l == 0 | |
| 661 l <- log(l) | |
| 662 l[f] <- NA | |
| 663 } | |
| 664 mi <- min(l,na.rm=TRUE) | |
| 665 ma <- max(l,na.rm=TRUE) | |
| 666 ColorRamp <- colorRampPalette(rev(brewer.pal(n = 7,name = "RdYlBu")))(100) | |
| 667 ColorLevels <- seq(mi, ma, length=length(ColorRamp)) | |
| 668 v <- round((l - mi)/(ma - mi)*99 + 1,0) | |
| 669 layout(matrix(data=c(1,3,2,4), nrow=2, ncol=2), widths=c(5,1,5,1), heights=c(5,1,1,1)) | |
| 670 par(mar = c(3,5,2.5,2)) | |
| 671 plot(object@tsne,xlab="Dim 1",ylab="Dim 2",main=n,pch=20,cex=0,col="grey") | |
| 672 for ( k in 1:length(v) ){ | |
| 673 points(object@tsne[k,1],object@tsne[k,2],col=ColorRamp[v[k]],pch=20,cex=1.5) | |
| 674 } | |
| 675 par(mar = c(3,2.5,2.5,2)) | |
| 676 image(1, ColorLevels, | |
| 677 matrix(data=ColorLevels, ncol=length(ColorLevels),nrow=1), | |
| 678 col=ColorRamp, | |
| 679 xlab="",ylab="", | |
| 680 xaxt="n") | |
| 681 layout(1) | |
| 682 } | |
| 683 ) |
