Mercurial > repos > bornea > dotplot_runner
comparison Dotplot_Release/pheatmap_j.R @ 0:dfa3436beb67 draft
Uploaded
| author | bornea |
|---|---|
| date | Fri, 29 Jan 2016 09:56:02 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfa3436beb67 |
|---|---|
| 1 lo = function(rown, coln, nrow, ncol, cellheight = NA, cellwidth = NA, treeheight_col, treeheight_row, legend, annotation, annotation_colors, annotation_legend, main, fontsize, fontsize_row, fontsize_col, ...){ | |
| 2 # Get height of colnames and length of rownames | |
| 3 if(!is.null(coln[1])){ | |
| 4 longest_coln = which.max(strwidth(coln, units = 'in')) | |
| 5 gp = list(fontsize = fontsize_col, ...) | |
| 6 coln_height = unit(1, "grobheight", textGrob(coln[longest_coln], rot = 90, gp = do.call(gpar, gp))) + unit(5, "bigpts") | |
| 7 } | |
| 8 else{ | |
| 9 coln_height = unit(5, "bigpts") | |
| 10 } | |
| 11 | |
| 12 if(!is.null(rown[1])){ | |
| 13 longest_rown = which.max(strwidth(rown, units = 'in')) | |
| 14 gp = list(fontsize = fontsize_row, ...) | |
| 15 rown_width = unit(1, "grobwidth", textGrob(rown[longest_rown], gp = do.call(gpar, gp))) + unit(10, "bigpts") | |
| 16 } | |
| 17 else{ | |
| 18 rown_width = unit(5, "bigpts") | |
| 19 } | |
| 20 | |
| 21 gp = list(fontsize = fontsize, ...) | |
| 22 # Legend position | |
| 23 if(!is.na(legend[1])){ | |
| 24 longest_break = which.max(nchar(names(legend))) | |
| 25 longest_break = unit(1.1, "grobwidth", textGrob(as.character(names(legend))[longest_break], gp = do.call(gpar, gp))) | |
| 26 title_length = unit(1.1, "grobwidth", textGrob("Scale", gp = gpar(fontface = "bold", ...))) | |
| 27 legend_width = unit(12, "bigpts") + longest_break * 1.2 | |
| 28 legend_width = max(title_length, legend_width) | |
| 29 } | |
| 30 else{ | |
| 31 legend_width = unit(0, "bigpts") | |
| 32 } | |
| 33 | |
| 34 # Set main title height | |
| 35 if(is.na(main)){ | |
| 36 main_height = unit(0, "npc") | |
| 37 } | |
| 38 else{ | |
| 39 main_height = unit(1.5, "grobheight", textGrob(main, gp = gpar(fontsize = 1.3 * fontsize, ...))) | |
| 40 } | |
| 41 | |
| 42 # Column annotations | |
| 43 if(!is.na(annotation[[1]][1])){ | |
| 44 # Column annotation height | |
| 45 annot_height = unit(ncol(annotation) * (8 + 2) + 2, "bigpts") | |
| 46 # Width of the correponding legend | |
| 47 longest_ann = which.max(nchar(as.matrix(annotation))) | |
| 48 annot_legend_width = unit(1.2, "grobwidth", textGrob(as.matrix(annotation)[longest_ann], gp = gpar(...))) + unit(12, "bigpts") | |
| 49 if(!annotation_legend){ | |
| 50 annot_legend_width = unit(0, "npc") | |
| 51 } | |
| 52 } | |
| 53 else{ | |
| 54 annot_height = unit(0, "bigpts") | |
| 55 annot_legend_width = unit(0, "bigpts") | |
| 56 } | |
| 57 | |
| 58 # Tree height | |
| 59 treeheight_col = unit(treeheight_col, "bigpts") + unit(5, "bigpts") | |
| 60 treeheight_row = unit(treeheight_row, "bigpts") + unit(5, "bigpts") | |
| 61 | |
| 62 # Set cell sizes | |
| 63 if(is.na(cellwidth)){ | |
| 64 matwidth = unit(1, "npc") - rown_width - legend_width - treeheight_row - annot_legend_width | |
| 65 } | |
| 66 else{ | |
| 67 matwidth = unit(cellwidth * ncol, "bigpts") | |
| 68 } | |
| 69 | |
| 70 if(is.na(cellheight)){ | |
| 71 matheight = unit(1, "npc") - main_height - coln_height - treeheight_col - annot_height | |
| 72 } | |
| 73 else{ | |
| 74 matheight = unit(cellheight * nrow, "bigpts") | |
| 75 } | |
| 76 | |
| 77 | |
| 78 # Produce layout() | |
| 79 pushViewport(viewport(layout = grid.layout(nrow = 5, ncol = 5, widths = unit.c(treeheight_row, matwidth, rown_width, legend_width, annot_legend_width), heights = unit.c(main_height, treeheight_col, annot_height, matheight, coln_height)), gp = do.call(gpar, gp))) | |
| 80 | |
| 81 # Get cell dimensions | |
| 82 pushViewport(vplayout(4, 2)) | |
| 83 cellwidth = convertWidth(unit(0:1, "npc"), "bigpts", valueOnly = T)[2] / ncol | |
| 84 cellheight = convertHeight(unit(0:1, "npc"), "bigpts", valueOnly = T)[2] / nrow | |
| 85 upViewport() | |
| 86 | |
| 87 # Return minimal cell dimension in bigpts to decide if borders are drawn | |
| 88 mindim = min(cellwidth, cellheight) | |
| 89 return(mindim) | |
| 90 } | |
| 91 | |
| 92 draw_dendrogram = function(hc, horizontal = T){ | |
| 93 h = hc$height / max(hc$height) / 1.05 | |
| 94 m = hc$merge | |
| 95 o = hc$order | |
| 96 n = length(o) | |
| 97 | |
| 98 m[m > 0] = n + m[m > 0] | |
| 99 m[m < 0] = abs(m[m < 0]) | |
| 100 | |
| 101 dist = matrix(0, nrow = 2 * n - 1, ncol = 2, dimnames = list(NULL, c("x", "y"))) | |
| 102 dist[1:n, 1] = 1 / n / 2 + (1 / n) * (match(1:n, o) - 1) | |
| 103 | |
| 104 for(i in 1:nrow(m)){ | |
| 105 dist[n + i, 1] = (dist[m[i, 1], 1] + dist[m[i, 2], 1]) / 2 | |
| 106 dist[n + i, 2] = h[i] | |
| 107 } | |
| 108 | |
| 109 draw_connection = function(x1, x2, y1, y2, y){ | |
| 110 grid.lines(x = c(x1, x1), y = c(y1, y)) | |
| 111 grid.lines(x = c(x2, x2), y = c(y2, y)) | |
| 112 grid.lines(x = c(x1, x2), y = c(y, y)) | |
| 113 } | |
| 114 | |
| 115 if(horizontal){ | |
| 116 for(i in 1:nrow(m)){ | |
| 117 draw_connection(dist[m[i, 1], 1], dist[m[i, 2], 1], dist[m[i, 1], 2], dist[m[i, 2], 2], h[i]) | |
| 118 } | |
| 119 } | |
| 120 | |
| 121 else{ | |
| 122 gr = rectGrob() | |
| 123 pushViewport(viewport(height = unit(1, "grobwidth", gr), width = unit(1, "grobheight", gr), angle = 90)) | |
| 124 dist[, 1] = 1 - dist[, 1] | |
| 125 for(i in 1:nrow(m)){ | |
| 126 draw_connection(dist[m[i, 1], 1], dist[m[i, 2], 1], dist[m[i, 1], 2], dist[m[i, 2], 2], h[i]) | |
| 127 } | |
| 128 upViewport() | |
| 129 } | |
| 130 } | |
| 131 | |
| 132 draw_matrix = function(matrix, border_color, border_width, fmat, fontsize_number){ | |
| 133 n = nrow(matrix) | |
| 134 m = ncol(matrix) | |
| 135 x = (1:m)/m - 1/2/m | |
| 136 y = 1 - ((1:n)/n - 1/2/n) | |
| 137 for(i in 1:m){ | |
| 138 grid.rect(x = x[i], y = y[1:n], width = 1/m, height = 1/n, gp = gpar(fill = matrix[,i], col = border_color, lwd = border_width)) | |
| 139 if(attr(fmat, "draw")){ | |
| 140 grid.text(x = x[i], y = y[1:n], label = fmat[, i], gp = gpar(col = "grey30", fontsize = fontsize_number)) | |
| 141 } | |
| 142 } | |
| 143 } | |
| 144 | |
| 145 draw_colnames = function(coln, ...){ | |
| 146 m = length(coln) | |
| 147 x = (1:m)/m - 1/2/m | |
| 148 grid.text(coln, x = x, y = unit(0.96, "npc"), just="right", rot = 90, gp = gpar(...)) | |
| 149 } | |
| 150 | |
| 151 draw_rownames = function(rown, ...){ | |
| 152 n = length(rown) | |
| 153 y = 1 - ((1:n)/n - 1/2/n) | |
| 154 grid.text(rown, x = unit(0.04, "npc"), y = y, vjust = 0.5, hjust = 0, gp = gpar(...)) | |
| 155 } | |
| 156 | |
| 157 draw_legend = function(color, breaks, legend, ...){ | |
| 158 height = min(unit(1, "npc"), unit(150, "bigpts")) | |
| 159 pushViewport(viewport(x = 0, y = unit(1, "npc"), just = c(0, 1), height = height)) | |
| 160 legend_pos = (legend - min(breaks)) / (max(breaks) - min(breaks)) | |
| 161 breaks = (breaks - min(breaks)) / (max(breaks) - min(breaks)) | |
| 162 h = breaks[-1] - breaks[-length(breaks)] | |
| 163 grid.rect(x = 0, y = breaks[-length(breaks)], width = unit(10, "bigpts"), height = h, hjust = 0, vjust = 0, gp = gpar(fill = color, col = "#FFFFFF00")) | |
| 164 grid.text(names(legend), x = unit(12, "bigpts"), y = legend_pos, hjust = 0, gp = gpar(...)) | |
| 165 upViewport() | |
| 166 } | |
| 167 | |
| 168 convert_annotations = function(annotation, annotation_colors){ | |
| 169 new = annotation | |
| 170 for(i in 1:ncol(annotation)){ | |
| 171 a = annotation[, i] | |
| 172 b = annotation_colors[[colnames(annotation)[i]]] | |
| 173 if(is.character(a) | is.factor(a)){ | |
| 174 a = as.character(a) | |
| 175 if(length(setdiff(a, names(b))) > 0){ | |
| 176 stop(sprintf("Factor levels on variable %s do not match with annotation_colors", colnames(annotation)[i])) | |
| 177 } | |
| 178 new[, i] = b[a] | |
| 179 } | |
| 180 else{ | |
| 181 a = cut(a, breaks = 100) | |
| 182 new[, i] = colorRampPalette(b)(100)[a] | |
| 183 } | |
| 184 } | |
| 185 return(as.matrix(new)) | |
| 186 } | |
| 187 | |
| 188 draw_annotations = function(converted_annotations, border_color, border_width){ | |
| 189 n = ncol(converted_annotations) | |
| 190 m = nrow(converted_annotations) | |
| 191 x = (1:m)/m - 1/2/m | |
| 192 y = cumsum(rep(8, n)) - 4 + cumsum(rep(2, n)) | |
| 193 for(i in 1:m){ | |
| 194 grid.rect(x = x[i], unit(y[1:n], "bigpts"), width = 1/m, height = unit(8, "bigpts"), gp = gpar(fill = converted_annotations[i, ], col = border_color, lwd = border_width)) | |
| 195 } | |
| 196 } | |
| 197 | |
| 198 draw_annotation_legend = function(annotation, annotation_colors, border_color, border_width, ...){ | |
| 199 y = unit(1, "npc") | |
| 200 text_height = unit(1, "grobheight", textGrob("FGH", gp = gpar(...))) | |
| 201 for(i in names(annotation_colors)){ | |
| 202 grid.text(i, x = 0, y = y, vjust = 1, hjust = 0, gp = gpar(fontface = "bold", ...)) | |
| 203 y = y - 1.5 * text_height | |
| 204 if(is.character(annotation[, i]) | is.factor(annotation[, i])){ | |
| 205 for(j in 1:length(annotation_colors[[i]])){ | |
| 206 grid.rect(x = unit(0, "npc"), y = y, hjust = 0, vjust = 1, height = text_height, width = text_height, gp = gpar(col = border_color, lwd = border_width, fill = annotation_colors[[i]][j])) | |
| 207 grid.text(names(annotation_colors[[i]])[j], x = text_height * 1.3, y = y, hjust = 0, vjust = 1, gp = gpar(...)) | |
| 208 y = y - 1.5 * text_height | |
| 209 } | |
| 210 } | |
| 211 else{ | |
| 212 yy = y - 4 * text_height + seq(0, 1, 0.02) * 4 * text_height | |
| 213 h = 4 * text_height * 0.02 | |
| 214 grid.rect(x = unit(0, "npc"), y = yy, hjust = 0, vjust = 1, height = h, width = text_height, gp = gpar(col = "#FFFFFF00", fill = colorRampPalette(annotation_colors[[i]])(50))) | |
| 215 txt = rev(range(grid.pretty(range(annotation[, i], na.rm = TRUE)))) | |
| 216 yy = y - c(0, 3) * text_height | |
| 217 grid.text(txt, x = text_height * 1.3, y = yy, hjust = 0, vjust = 1, gp = gpar(...)) | |
| 218 y = y - 4.5 * text_height | |
| 219 } | |
| 220 y = y - 1.5 * text_height | |
| 221 } | |
| 222 } | |
| 223 | |
| 224 draw_main = function(text, ...){ | |
| 225 grid.text(text, gp = gpar(fontface = "bold", ...)) | |
| 226 } | |
| 227 | |
| 228 vplayout = function(x, y){ | |
| 229 return(viewport(layout.pos.row = x, layout.pos.col = y)) | |
| 230 } | |
| 231 | |
| 232 heatmap_motor = function(matrix, border_color, border_width, cellwidth, cellheight, tree_col, tree_row, treeheight_col, treeheight_row, filename, width, height, breaks, color, legend, annotation, annotation_colors, annotation_legend, main, fontsize, fontsize_row, fontsize_col, fmat, fontsize_number, ...){ | |
| 233 grid.newpage() | |
| 234 | |
| 235 # Set layout | |
| 236 mindim = lo(coln = colnames(matrix), rown = rownames(matrix), nrow = nrow(matrix), ncol = ncol(matrix), cellwidth = cellwidth, cellheight = cellheight, treeheight_col = treeheight_col, treeheight_row = treeheight_row, legend = legend, annotation = annotation, annotation_colors = annotation_colors, annotation_legend = annotation_legend, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, ...) | |
| 237 | |
| 238 if(!is.na(filename)){ | |
| 239 pushViewport(vplayout(1:5, 1:5)) | |
| 240 | |
| 241 if(is.na(height)){ | |
| 242 height = convertHeight(unit(0:1, "npc"), "inches", valueOnly = T)[2] | |
| 243 } | |
| 244 if(is.na(width)){ | |
| 245 width = convertWidth(unit(0:1, "npc"), "inches", valueOnly = T)[2] | |
| 246 } | |
| 247 | |
| 248 # Get file type | |
| 249 r = regexpr("\\.[a-zA-Z]*$", filename) | |
| 250 if(r == -1) stop("Improper filename") | |
| 251 ending = substr(filename, r + 1, r + attr(r, "match.length")) | |
| 252 | |
| 253 f = switch(ending, | |
| 254 pdf = function(x, ...) pdf(x, ...), | |
| 255 png = function(x, ...) png(x, units = "in", res = 300, ...), | |
| 256 jpeg = function(x, ...) jpeg(x, units = "in", res = 300, ...), | |
| 257 jpg = function(x, ...) jpeg(x, units = "in", res = 300, ...), | |
| 258 tiff = function(x, ...) tiff(x, units = "in", res = 300, compression = "lzw", ...), | |
| 259 bmp = function(x, ...) bmp(x, units = "in", res = 300, ...), | |
| 260 stop("File type should be: pdf, png, bmp, jpg, tiff") | |
| 261 ) | |
| 262 | |
| 263 # print(sprintf("height:%f width:%f", height, width)) | |
| 264 f(filename, height = height, width = width) | |
| 265 heatmap_motor(matrix, cellwidth = cellwidth, cellheight = cellheight, border_color = border_color, border_width = border_width, tree_col = tree_col, tree_row = tree_row, treeheight_col = treeheight_col, treeheight_row = treeheight_row, breaks = breaks, color = color, legend = legend, annotation = annotation, annotation_colors = annotation_colors, annotation_legend = annotation_legend, filename = NA, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, fmat = fmat, fontsize_number = fontsize_number, ...) | |
| 266 dev.off() | |
| 267 upViewport() | |
| 268 return() | |
| 269 } | |
| 270 | |
| 271 # Omit border color if cell size is too small | |
| 272 if(mindim < 3) border_color = NA | |
| 273 | |
| 274 # Draw title | |
| 275 if(!is.na(main)){ | |
| 276 pushViewport(vplayout(1, 2)) | |
| 277 draw_main(main, fontsize = 1.3 * fontsize, ...) | |
| 278 upViewport() | |
| 279 } | |
| 280 | |
| 281 # Draw tree for the columns | |
| 282 if(!is.na(tree_col[[1]][1]) & treeheight_col != 0){ | |
| 283 pushViewport(vplayout(2, 2)) | |
| 284 draw_dendrogram(tree_col, horizontal = T) | |
| 285 upViewport() | |
| 286 } | |
| 287 | |
| 288 # Draw tree for the rows | |
| 289 if(!is.na(tree_row[[1]][1]) & treeheight_row != 0){ | |
| 290 pushViewport(vplayout(4, 1)) | |
| 291 draw_dendrogram(tree_row, horizontal = F) | |
| 292 upViewport() | |
| 293 } | |
| 294 | |
| 295 # Draw matrix | |
| 296 pushViewport(vplayout(4, 2)) | |
| 297 draw_matrix(matrix, border_color, border_width, fmat, fontsize_number) | |
| 298 upViewport() | |
| 299 | |
| 300 # Draw colnames | |
| 301 if(length(colnames(matrix)) != 0){ | |
| 302 pushViewport(vplayout(5, 2)) | |
| 303 pars = list(colnames(matrix), fontsize = fontsize_col, ...) | |
| 304 do.call(draw_colnames, pars) | |
| 305 upViewport() | |
| 306 } | |
| 307 | |
| 308 # Draw rownames | |
| 309 if(length(rownames(matrix)) != 0){ | |
| 310 pushViewport(vplayout(4, 3)) | |
| 311 pars = list(rownames(matrix), fontsize = fontsize_row, ...) | |
| 312 do.call(draw_rownames, pars) | |
| 313 upViewport() | |
| 314 } | |
| 315 | |
| 316 # Draw annotation tracks | |
| 317 if(!is.na(annotation[[1]][1])){ | |
| 318 pushViewport(vplayout(3, 2)) | |
| 319 converted_annotation = convert_annotations(annotation, annotation_colors) | |
| 320 draw_annotations(converted_annotation, border_color, border_width) | |
| 321 upViewport() | |
| 322 } | |
| 323 | |
| 324 # Draw annotation legend | |
| 325 if(!is.na(annotation[[1]][1]) & annotation_legend){ | |
| 326 if(length(rownames(matrix)) != 0){ | |
| 327 pushViewport(vplayout(4:5, 5)) | |
| 328 } | |
| 329 else{ | |
| 330 pushViewport(vplayout(3:5, 5)) | |
| 331 } | |
| 332 draw_annotation_legend(annotation, annotation_colors, border_color, border_width, fontsize = fontsize, ...) | |
| 333 upViewport() | |
| 334 } | |
| 335 | |
| 336 # Draw legend | |
| 337 if(!is.na(legend[1])){ | |
| 338 length(colnames(matrix)) | |
| 339 if(length(rownames(matrix)) != 0){ | |
| 340 pushViewport(vplayout(4:5, 4)) | |
| 341 } | |
| 342 else{ | |
| 343 pushViewport(vplayout(3:5, 4)) | |
| 344 } | |
| 345 draw_legend(color, breaks, legend, fontsize = fontsize, ...) | |
| 346 upViewport() | |
| 347 } | |
| 348 | |
| 349 | |
| 350 } | |
| 351 | |
| 352 generate_breaks = function(x, n, center = F){ | |
| 353 if(center){ | |
| 354 m = max(abs(c(min(x, na.rm = T), max(x, na.rm = T)))) | |
| 355 res = seq(-m, m, length.out = n + 1) | |
| 356 } | |
| 357 else{ | |
| 358 res = seq(min(x, na.rm = T), max(x, na.rm = T), length.out = n + 1) | |
| 359 } | |
| 360 | |
| 361 return(res) | |
| 362 } | |
| 363 | |
| 364 scale_vec_colours = function(x, col = rainbow(10), breaks = NA){ | |
| 365 return(col[as.numeric(cut(x, breaks = breaks, include.lowest = T))]) | |
| 366 } | |
| 367 | |
| 368 scale_colours = function(mat, col = rainbow(10), breaks = NA){ | |
| 369 mat = as.matrix(mat) | |
| 370 return(matrix(scale_vec_colours(as.vector(mat), col = col, breaks = breaks), nrow(mat), ncol(mat), dimnames = list(rownames(mat), colnames(mat)))) | |
| 371 } | |
| 372 | |
| 373 cluster_mat = function(mat, distance, method){ | |
| 374 if(!(method %in% c("ward", "single", "complete", "average", "mcquitty", "median", "centroid"))){ | |
| 375 stop("clustering method has to one form the list: 'ward', 'single', 'complete', 'average', 'mcquitty', 'median' or 'centroid'.") | |
| 376 } | |
| 377 if(!(distance[1] %in% c("correlation", "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski")) & class(distance) != "dist"){ | |
| 378 print(!(distance[1] %in% c("correlation", "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski")) | class(distance) != "dist") | |
| 379 stop("distance has to be a dissimilarity structure as produced by dist or one measure form the list: 'correlation', 'euclidean', 'maximum', 'manhattan', 'canberra', 'binary', 'minkowski'") | |
| 380 } | |
| 381 if(distance[1] == "correlation"){ | |
| 382 d = as.dist(1 - cor(t(mat))) | |
| 383 } | |
| 384 else{ | |
| 385 if(class(distance) == "dist"){ | |
| 386 d = distance | |
| 387 } | |
| 388 else{ | |
| 389 d = dist(mat, method = distance) | |
| 390 } | |
| 391 } | |
| 392 | |
| 393 return(hclust(d, method = method)) | |
| 394 } | |
| 395 | |
| 396 scale_rows = function(x){ | |
| 397 m = apply(x, 1, mean, na.rm = T) | |
| 398 s = apply(x, 1, sd, na.rm = T) | |
| 399 return((x - m) / s) | |
| 400 } | |
| 401 | |
| 402 scale_mat = function(mat, scale){ | |
| 403 if(!(scale %in% c("none", "row", "column"))){ | |
| 404 stop("scale argument shoud take values: 'none', 'row' or 'column'") | |
| 405 } | |
| 406 mat = switch(scale, none = mat, row = scale_rows(mat), column = t(scale_rows(t(mat)))) | |
| 407 return(mat) | |
| 408 } | |
| 409 | |
| 410 generate_annotation_colours = function(annotation, annotation_colors, drop){ | |
| 411 if(is.na(annotation_colors)[[1]][1]){ | |
| 412 annotation_colors = list() | |
| 413 } | |
| 414 count = 0 | |
| 415 for(i in 1:ncol(annotation)){ | |
| 416 if(is.character(annotation[, i]) | is.factor(annotation[, i])){ | |
| 417 if (is.factor(annotation[, i]) & !drop){ | |
| 418 count = count + length(levels(annotation[, i])) | |
| 419 } | |
| 420 else{ | |
| 421 count = count + length(unique(annotation[, i])) | |
| 422 } | |
| 423 } | |
| 424 } | |
| 425 | |
| 426 factor_colors = hsv((seq(0, 1, length.out = count + 1)[-1] + | |
| 427 0.2)%%1, 0.7, 0.95) | |
| 428 | |
| 429 set.seed(3453) | |
| 430 | |
| 431 for(i in 1:ncol(annotation)){ | |
| 432 if(!(colnames(annotation)[i] %in% names(annotation_colors))){ | |
| 433 if(is.character(annotation[, i]) | is.factor(annotation[, i])){ | |
| 434 n = length(unique(annotation[, i])) | |
| 435 if (is.factor(annotation[, i]) & !drop){ | |
| 436 n = length(levels(annotation[, i])) | |
| 437 } | |
| 438 ind = sample(1:length(factor_colors), n) | |
| 439 annotation_colors[[colnames(annotation)[i]]] = factor_colors[ind] | |
| 440 l = levels(as.factor(annotation[, i])) | |
| 441 l = l[l %in% unique(annotation[, i])] | |
| 442 if (is.factor(annotation[, i]) & !drop){ | |
| 443 l = levels(annotation[, i]) | |
| 444 } | |
| 445 names(annotation_colors[[colnames(annotation)[i]]]) = l | |
| 446 factor_colors = factor_colors[-ind] | |
| 447 } | |
| 448 else{ | |
| 449 r = runif(1) | |
| 450 annotation_colors[[colnames(annotation)[i]]] = hsv(r, c(0.1, 1), 1) | |
| 451 } | |
| 452 } | |
| 453 } | |
| 454 return(annotation_colors) | |
| 455 } | |
| 456 | |
| 457 kmeans_pheatmap = function(mat, k = min(nrow(mat), 150), sd_limit = NA, ...){ | |
| 458 # Filter data | |
| 459 if(!is.na(sd_limit)){ | |
| 460 s = apply(mat, 1, sd) | |
| 461 mat = mat[s > sd_limit, ] | |
| 462 } | |
| 463 | |
| 464 # Cluster data | |
| 465 set.seed(1245678) | |
| 466 km = kmeans(mat, k, iter.max = 100) | |
| 467 mat2 = km$centers | |
| 468 | |
| 469 # Compose rownames | |
| 470 t = table(km$cluster) | |
| 471 rownames(mat2) = sprintf("cl%s_size_%d", names(t), t) | |
| 472 | |
| 473 # Draw heatmap | |
| 474 pheatmap(mat2, ...) | |
| 475 } | |
| 476 | |
| 477 #' A function to draw clustered heatmaps. | |
| 478 #' | |
| 479 #' A function to draw clustered heatmaps where one has better control over some graphical | |
| 480 #' parameters such as cell size, etc. | |
| 481 #' | |
| 482 #' The function also allows to aggregate the rows using kmeans clustering. This is | |
| 483 #' advisable if number of rows is so big that R cannot handle their hierarchical | |
| 484 #' clustering anymore, roughly more than 1000. Instead of showing all the rows | |
| 485 #' separately one can cluster the rows in advance and show only the cluster centers. | |
| 486 #' The number of clusters can be tuned with parameter kmeans_k. | |
| 487 #' | |
| 488 #' @param mat numeric matrix of the values to be plotted. | |
| 489 #' @param color vector of colors used in heatmap. | |
| 490 #' @param kmeans_k the number of kmeans clusters to make, if we want to agggregate the | |
| 491 #' rows before drawing heatmap. If NA then the rows are not aggregated. | |
| 492 #' @param breaks a sequence of numbers that covers the range of values in mat and is one | |
| 493 #' element longer than color vector. Used for mapping values to colors. Useful, if needed | |
| 494 #' to map certain values to certain colors, to certain values. If value is NA then the | |
| 495 #' breaks are calculated automatically. | |
| 496 #' @param border_color color of cell borders on heatmap, use NA if no border should be | |
| 497 #' drawn. | |
| 498 #' @param cellwidth individual cell width in points. If left as NA, then the values | |
| 499 #' depend on the size of plotting window. | |
| 500 #' @param cellheight individual cell height in points. If left as NA, | |
| 501 #' then the values depend on the size of plotting window. | |
| 502 #' @param scale character indicating if the values should be centered and scaled in | |
| 503 #' either the row direction or the column direction, or none. Corresponding values are | |
| 504 #' \code{"row"}, \code{"column"} and \code{"none"} | |
| 505 #' @param cluster_rows boolean values determining if rows should be clustered, | |
| 506 #' @param cluster_cols boolean values determining if columns should be clustered. | |
| 507 #' @param clustering_distance_rows distance measure used in clustering rows. Possible | |
| 508 #' values are \code{"correlation"} for Pearson correlation and all the distances | |
| 509 #' supported by \code{\link{dist}}, such as \code{"euclidean"}, etc. If the value is none | |
| 510 #' of the above it is assumed that a distance matrix is provided. | |
| 511 #' @param clustering_distance_cols distance measure used in clustering columns. Possible | |
| 512 #' values the same as for clustering_distance_rows. | |
| 513 #' @param clustering_method clustering method used. Accepts the same values as | |
| 514 #' \code{\link{hclust}}. | |
| 515 #' @param treeheight_row the height of a tree for rows, if these are clustered. | |
| 516 #' Default value 50 points. | |
| 517 #' @param treeheight_col the height of a tree for columns, if these are clustered. | |
| 518 #' Default value 50 points. | |
| 519 #' @param legend logical to determine if legend should be drawn or not. | |
| 520 #' @param legend_breaks vector of breakpoints for the legend. | |
| 521 #' @param legend_labels vector of labels for the \code{legend_breaks}. | |
| 522 #' @param annotation data frame that specifies the annotations shown on top of the | |
| 523 #' columns. Each row defines the features for a specific column. The columns in the data | |
| 524 #' and rows in the annotation are matched using corresponding row and column names. Note | |
| 525 #' that color schemes takes into account if variable is continuous or discrete. | |
| 526 #' @param annotation_colors list for specifying annotation track colors manually. It is | |
| 527 #' possible to define the colors for only some of the features. Check examples for | |
| 528 #' details. | |
| 529 #' @param annotation_legend boolean value showing if the legend for annotation tracks | |
| 530 #' should be drawn. | |
| 531 #' @param drop_levels logical to determine if unused levels are also shown in the legend | |
| 532 #' @param show_rownames boolean specifying if column names are be shown. | |
| 533 #' @param show_colnames boolean specifying if column names are be shown. | |
| 534 #' @param main the title of the plot | |
| 535 #' @param fontsize base fontsize for the plot | |
| 536 #' @param fontsize_row fontsize for rownames (Default: fontsize) | |
| 537 #' @param fontsize_col fontsize for colnames (Default: fontsize) | |
| 538 #' @param display_numbers logical determining if the numeric values are also printed to | |
| 539 #' the cells. | |
| 540 #' @param number_format format strings (C printf style) of the numbers shown in cells. | |
| 541 #' For example "\code{\%.2f}" shows 2 decimal places and "\code{\%.1e}" shows exponential | |
| 542 #' notation (see more in \code{\link{sprintf}}). | |
| 543 #' @param fontsize_number fontsize of the numbers displayed in cells | |
| 544 #' @param filename file path where to save the picture. Filetype is decided by | |
| 545 #' the extension in the path. Currently following formats are supported: png, pdf, tiff, | |
| 546 #' bmp, jpeg. Even if the plot does not fit into the plotting window, the file size is | |
| 547 #' calculated so that the plot would fit there, unless specified otherwise. | |
| 548 #' @param width manual option for determining the output file width in inches. | |
| 549 #' @param height manual option for determining the output file height in inches. | |
| 550 #' @param \dots graphical parameters for the text used in plot. Parameters passed to | |
| 551 #' \code{\link{grid.text}}, see \code{\link{gpar}}. | |
| 552 #' | |
| 553 #' @return | |
| 554 #' Invisibly a list of components | |
| 555 #' \itemize{ | |
| 556 #' \item \code{tree_row} the clustering of rows as \code{\link{hclust}} object | |
| 557 #' \item \code{tree_col} the clustering of columns as \code{\link{hclust}} object | |
| 558 #' \item \code{kmeans} the kmeans clustering of rows if parameter \code{kmeans_k} was | |
| 559 #' specified | |
| 560 #' } | |
| 561 #' | |
| 562 #' @author Raivo Kolde <rkolde@@gmail.com> | |
| 563 #' @examples | |
| 564 #' # Generate some data | |
| 565 #' test = matrix(rnorm(200), 20, 10) | |
| 566 #' test[1:10, seq(1, 10, 2)] = test[1:10, seq(1, 10, 2)] + 3 | |
| 567 #' test[11:20, seq(2, 10, 2)] = test[11:20, seq(2, 10, 2)] + 2 | |
| 568 #' test[15:20, seq(2, 10, 2)] = test[15:20, seq(2, 10, 2)] + 4 | |
| 569 #' colnames(test) = paste("Test", 1:10, sep = "") | |
| 570 #' rownames(test) = paste("Gene", 1:20, sep = "") | |
| 571 #' | |
| 572 #' # Draw heatmaps | |
| 573 #' pheatmap(test) | |
| 574 #' pheatmap(test, kmeans_k = 2) | |
| 575 #' pheatmap(test, scale = "row", clustering_distance_rows = "correlation") | |
| 576 #' pheatmap(test, color = colorRampPalette(c("navy", "white", "firebrick3"))(50)) | |
| 577 #' pheatmap(test, cluster_row = FALSE) | |
| 578 #' pheatmap(test, legend = FALSE) | |
| 579 #' pheatmap(test, display_numbers = TRUE) | |
| 580 #' pheatmap(test, display_numbers = TRUE, number_format = "%.1e") | |
| 581 #' pheatmap(test, cluster_row = FALSE, legend_breaks = -1:4, legend_labels = c("0", | |
| 582 #' "1e-4", "1e-3", "1e-2", "1e-1", "1")) | |
| 583 #' pheatmap(test, cellwidth = 15, cellheight = 12, main = "Example heatmap") | |
| 584 #' pheatmap(test, cellwidth = 15, cellheight = 12, fontsize = 8, filename = "test.pdf") | |
| 585 #' | |
| 586 #' | |
| 587 #' # Generate column annotations | |
| 588 #' annotation = data.frame(Var1 = factor(1:10 %% 2 == 0, | |
| 589 #' labels = c("Class1", "Class2")), Var2 = 1:10) | |
| 590 #' annotation$Var1 = factor(annotation$Var1, levels = c("Class1", "Class2", "Class3")) | |
| 591 #' rownames(annotation) = paste("Test", 1:10, sep = "") | |
| 592 #' | |
| 593 #' pheatmap(test, annotation = annotation) | |
| 594 #' pheatmap(test, annotation = annotation, annotation_legend = FALSE) | |
| 595 #' pheatmap(test, annotation = annotation, annotation_legend = FALSE, drop_levels = FALSE) | |
| 596 #' | |
| 597 #' # Specify colors | |
| 598 #' Var1 = c("navy", "darkgreen") | |
| 599 #' names(Var1) = c("Class1", "Class2") | |
| 600 #' Var2 = c("lightgreen", "navy") | |
| 601 #' | |
| 602 #' ann_colors = list(Var1 = Var1, Var2 = Var2) | |
| 603 #' | |
| 604 #' pheatmap(test, annotation = annotation, annotation_colors = ann_colors, main = "Example") | |
| 605 #' | |
| 606 #' # Specifying clustering from distance matrix | |
| 607 #' drows = dist(test, method = "minkowski") | |
| 608 #' dcols = dist(t(test), method = "minkowski") | |
| 609 #' pheatmap(test, clustering_distance_rows = drows, clustering_distance_cols = dcols) | |
| 610 #' | |
| 611 #' @export | |
| 612 pheatmap_j = function(mat, color = colorRampPalette(rev(brewer.pal(n = 7, name = "RdYlBu")))(100), kmeans_k = NA, breaks = NA, border_color = "grey60", border_width = 1, cellwidth = NA, cellheight = NA, scale = "none", cluster_rows = TRUE, cluster_cols = TRUE, clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean", clustering_method = "complete", treeheight_row = ifelse(cluster_rows, 50, 0), treeheight_col = ifelse(cluster_cols, 50, 0), legend = TRUE, legend_breaks = NA, legend_labels = NA, annotation = NA, annotation_colors = NA, annotation_legend = TRUE, drop_levels = TRUE, show_rownames = T, show_colnames = T, main = NA, fontsize = 10, fontsize_row = fontsize, fontsize_col = fontsize, display_numbers = F, number_format = "%.2f", fontsize_number = 0.8 * fontsize, filename = NA, width = NA, height = NA, ...){ | |
| 613 | |
| 614 # Preprocess matrix | |
| 615 mat = as.matrix(mat) | |
| 616 if(scale != "none"){ | |
| 617 mat = scale_mat(mat, scale) | |
| 618 if(is.na(breaks)){ | |
| 619 breaks = generate_breaks(mat, length(color), center = T) | |
| 620 } | |
| 621 } | |
| 622 | |
| 623 | |
| 624 # Kmeans | |
| 625 if(!is.na(kmeans_k)){ | |
| 626 # Cluster data | |
| 627 km = kmeans(mat, kmeans_k, iter.max = 100) | |
| 628 mat = km$centers | |
| 629 | |
| 630 # Compose rownames | |
| 631 t = table(km$cluster) | |
| 632 rownames(mat) = sprintf("cl%s_size_%d", names(t), t) | |
| 633 } | |
| 634 else{ | |
| 635 km = NA | |
| 636 } | |
| 637 | |
| 638 # Do clustering | |
| 639 if(cluster_rows){ | |
| 640 tree_row = cluster_mat(mat, distance = clustering_distance_rows, method = clustering_method) | |
| 641 mat = mat[tree_row$order, , drop = FALSE] | |
| 642 } | |
| 643 else{ | |
| 644 tree_row = NA | |
| 645 treeheight_row = 0 | |
| 646 } | |
| 647 | |
| 648 if(cluster_cols){ | |
| 649 tree_col = cluster_mat(t(mat), distance = clustering_distance_cols, method = clustering_method) | |
| 650 mat = mat[, tree_col$order, drop = FALSE] | |
| 651 } | |
| 652 else{ | |
| 653 tree_col = NA | |
| 654 treeheight_col = 0 | |
| 655 } | |
| 656 | |
| 657 # Format numbers to be displayed in cells | |
| 658 if(display_numbers){ | |
| 659 fmat = matrix(sprintf(number_format, mat), nrow = nrow(mat), ncol = ncol(mat)) | |
| 660 attr(fmat, "draw") = TRUE | |
| 661 } | |
| 662 else{ | |
| 663 fmat = matrix(NA, nrow = nrow(mat), ncol = ncol(mat)) | |
| 664 attr(fmat, "draw") = FALSE | |
| 665 } | |
| 666 | |
| 667 | |
| 668 # Colors and scales | |
| 669 if(!is.na(legend_breaks[1]) & !is.na(legend_labels[1])){ | |
| 670 if(length(legend_breaks) != length(legend_labels)){ | |
| 671 stop("Lengths of legend_breaks and legend_labels must be the same") | |
| 672 } | |
| 673 } | |
| 674 | |
| 675 | |
| 676 if(is.na(breaks[1])){ | |
| 677 breaks = generate_breaks(as.vector(mat), length(color)) | |
| 678 } | |
| 679 if (legend & is.na(legend_breaks[1])) { | |
| 680 legend = grid.pretty(range(as.vector(breaks))) | |
| 681 names(legend) = legend | |
| 682 } | |
| 683 else if(legend & !is.na(legend_breaks[1])){ | |
| 684 legend = legend_breaks[legend_breaks >= min(breaks) & legend_breaks <= max(breaks)] | |
| 685 | |
| 686 if(!is.na(legend_labels[1])){ | |
| 687 legend_labels = legend_labels[legend_breaks >= min(breaks) & legend_breaks <= max(breaks)] | |
| 688 names(legend) = legend_labels | |
| 689 } | |
| 690 else{ | |
| 691 names(legend) = legend | |
| 692 } | |
| 693 } | |
| 694 else { | |
| 695 legend = NA | |
| 696 } | |
| 697 mat = scale_colours(mat, col = color, breaks = breaks) | |
| 698 | |
| 699 # Preparing annotation colors | |
| 700 if(!is.na(annotation[[1]][1])){ | |
| 701 annotation = annotation[colnames(mat), , drop = F] | |
| 702 annotation_colors = generate_annotation_colours(annotation, annotation_colors, drop = drop_levels) | |
| 703 } | |
| 704 | |
| 705 if(!show_rownames){ | |
| 706 rownames(mat) = NULL | |
| 707 } | |
| 708 | |
| 709 if(!show_colnames){ | |
| 710 colnames(mat) = NULL | |
| 711 } | |
| 712 | |
| 713 # Draw heatmap | |
| 714 heatmap_motor(mat, border_color = border_color, border_width = border_width, cellwidth = cellwidth, cellheight = cellheight, treeheight_col = treeheight_col, treeheight_row = treeheight_row, tree_col = tree_col, tree_row = tree_row, filename = filename, width = width, height = height, breaks = breaks, color = color, legend = legend, annotation = annotation, annotation_colors = annotation_colors, annotation_legend = annotation_legend, main = main, fontsize = fontsize, fontsize_row = fontsize_row, fontsize_col = fontsize_col, fmat = fmat, fontsize_number = fontsize_number, ...) | |
| 715 | |
| 716 invisible(list(tree_row = tree_row, tree_col = tree_col, kmeans = km)) | |
| 717 } | |
| 718 | |
| 719 |
