annotate lib/utils.R @ 0:1d1b9e1b2e2f draft

Uploaded
author petr-novak
date Thu, 19 Dec 2019 10:24:45 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env Rscript
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
2 suppressPackageStartupMessages(library(DBI))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
3 suppressPackageStartupMessages(library(RSQLite))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
4
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
5 CONNECTED = FALSE
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
6 if (FALSE) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
7 ## for testing
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
8 seqdb = "/mnt/raid/spolecny/petr/RE2/comparative_test/sequences.db"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
9 hitsortdb = "/mnt/raid/spolecny/petr/RE2/comparative_test/hitsort.db"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
10 class_file = "/mnt/raid/users/petr/workspace/repex_tarean/databases/classification_tree.rds"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
11 ## connect to sqlite databases
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
12 SEQDB = dbConnect(RSQLite::SQLite(), seqdb)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
13 HITSORTDB = dbConnect(RSQLite::SQLite(), hitsortdb)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
14 CLS_TREE = readRDS(class_file)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
15 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
16
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
17 connect_to_databases = function(seqdb, hitsortdb,classification_hierarchy_file = NULL){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
18 if (!CONNECTED){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
19 SEQDB <<- dbConnect(RSQLite::SQLite(), seqdb)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
20 HITSORTDB <<- dbConnect(RSQLite::SQLite(), hitsortdb)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
21 if (!is.null(classification_hierarchy_file)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
22 CLS_TREE <<- readRDS(classification_hierarchy_file)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
23 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
24 CONNECTED <<- TRUE
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
25 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
26 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
27
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
28 disconnect_database = function(){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
29 if (CONNECTED){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
30 dbDisconnect(SEQDB)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
31 dbDisconnect(HITSORTDB)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
32 CONNECTED <<- FALSE
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
33 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
34 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
35
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
36 nested2named_list = function(x){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
37 y = as.list(unlist(x[[1]]))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
38 names(y) = unlist(x[[2]])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
39 return(y)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
40 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
41
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
42 is_comparative = function(){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
43 prefix_codes = dbGetQuery(SEQDB,"SELECT * FROM prefix_codes")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
44 if (nrow(prefix_codes) == 0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
45 return(FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
46 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
47 return(TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
48 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
49 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
50
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
51 get_comparative_codes = function(){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
52 prefix_codes = dbGetQuery(SEQDB,"SELECT * FROM prefix_codes")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
53 return(prefix_codes)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
54 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
55
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
56 add_preamble = function(html_file, preamble){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
57 html_content=readLines(html_file)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
58 modified_html_content = gsub("<body>",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
59 paste("<body>\n", preamble,"\n"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
60 html_content)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
61 cat(modified_html_content, file = html_file, sep="\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
62 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
63
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
64
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
65 df2html = function(df, header = NULL, sort_col = NULL, digits = 3, rounding_function=signif, decreasing = TRUE, scroling = FALSE, width = 300){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
66 if (!is.null(sort_col)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
67 df = df[order(df[,sort_col], decreasing = decreasing),]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
68 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
69 if (!is.null(digits)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
70 for (i in seq_along(df)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
71 if(is.numeric(df[,i])){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
72 df[,i] = rounding_function(df[,i], digits)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
73 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
74 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
75 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
76 if (is.null(header)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
77 h = ""
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
78 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
79 h = paste0(" <th>",header,"</th>\n", collapse="") %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
80 paste0(" <tr>\n", .," </tr>\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
81 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
82 x = apply(df,1,function(x)paste0(" <td>",x,"</td>\n", collapse="")) %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
83 paste0(" <tr>\n", .," </tr>\n", collapse = "")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
84 if (scroling){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
85 cols = paste0('<col width="',rep(round(100/ncol(df)),ncol(df)),'%">\n',collapse ="")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
86 height = min(200, 22 * nrow(df))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
87 out = paste0(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
88 '<table cellspacing="0" cellpadding="0" border="0" width="',width,'">\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
89 ' <tr>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
90 ' <td>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
91 ' <table cellspacing="0" cellpadding="1" border="1" width="', width,'" >\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
92 cols,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
93 h,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
94 ' </table>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
95 ' </td>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
96 ' </tr>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
97 ' <tr>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
98 ' <td>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
99 ' <div style="width:',width,'px; height:',height,'px; overflow:auto;">\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
100 ' <table cellspacing="0" cellpadding="1" border="1" width="',width,'" >\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
101 cols,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
102 x,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
103 ' </table>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
104 ' </div>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
105 ' </td>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
106 ' </tr>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
107 '</table>\n'
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
108 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
109
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
110 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
111 out = paste ("<table>\n", h,x, "</table>\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
112 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
113 return(out)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
114 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
115
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
116 start_html = function(filename, header){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
117 cat(header, file = filename)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
118 html_writer = function(content, fn=HTML, ...){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
119 fn(content, append = TRUE, file = filename, ...)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
120 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
121 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
122
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
123 preformatted = function(x){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
124 ## make preformatted html text
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
125 return(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
126 paste(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
127 "<pre>\n",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
128 x,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
129 "</pre>"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
130 ,sep="")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
131 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
132 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
133
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
134
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
135 summary_histogram = function(fn, N_clustering, N_omit=0, size_adjusted=NULL, top_clusters_prop){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
136 ## assume connection do databases
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
137 communities = dbGetQuery(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
138 HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
139 "SELECT DISTINCT cluster, size, supercluster, supercluster_size FROM communities ORDER BY supercluster"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
140 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
141 if (N_omit != 0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
142 ## adjust communities and cluster sizes:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
143 cluster_to_adjust = which(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
144 communities$size[order(communities$cluster)][1:length(size_adjusted)] != size_adjusted
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
145 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
146 ## keep original value:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
147 communities$size_original = communities$size
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
148 superclusters_to_adjust = unique(communities$supercluster[communities$cluster %in% cluster_to_adjust])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
149 for (cl in cluster_to_adjust){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
150 communities[communities$cluster == cl,'size'] = size_adjusted[cl]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
151 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
152 for (cl in superclusters_to_adjust){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
153 communities[communities$supercluster == cl,'supercluster_size'] =
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
154 sum(communities[communities$supercluster == cl,'size'])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
155 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
156 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
157 cluster_to_adjust=NULL
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
158 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
159 singlets = N_clustering - sum(communities$size)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
160
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
161 supercluster_size = sort(unique(communities[, c('supercluster', 'supercluster_size')])$supercluster_size, decreasing = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
162
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
163
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
164 clid2size = sort(communities$size, decreasing = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
165
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
166 cluster_id = split(communities$cluster, communities$supercluster)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
167 cluster_id_sort = lapply(cluster_id, function(x)x[order(clid2size[x], decreasing = FALSE)])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
168
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
169 cluster_size_unsorted = split(communities$size, communities$supercluster)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
170 cluster_size_sort = lapply(cluster_size_unsorted, function(x) (sort(x)))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
171 ## reorder by size of superclusters
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
172 cluster_size_sort_sort = cluster_size_sort[order(sapply(cluster_size_sort, sum), decreasing = TRUE)]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
173 cluster_id_sort_sort = cluster_id_sort[order(sapply(cluster_size_sort, sum), decreasing = TRUE)]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
174
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
175
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
176 Nmax = max(sapply(cluster_size_sort_sort, length))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
177 M = cbind(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
178 sapply(cluster_size_sort_sort, function(x)y = c(x, rep(0, 1 + Nmax - length(x)))),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
179 c(1, rep(0, Nmax))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
180 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
181
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
182 Mid = cbind(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
183 sapply(cluster_id_sort_sort, function(x)y = c(x, rep(0, 1 + Nmax - length(x)))),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
184 c(1, rep(0, Nmax))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
185 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
186
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
187 recolor = matrix(ifelse(Mid %in% cluster_to_adjust,TRUE,FALSE), ncol=ncol(Mid))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
188 indices = which(recolor, arr.ind = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
189
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
190
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
191 png(fn, width = 1200, height = 700, pointsize = 20)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
192 barplot(M,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
193 width = c(supercluster_size, singlets),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
194 space = 0, ylim = c(0, max(supercluster_size) * 1.2),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
195 col = c("#CCCCCC"), names.arg = rep("", ncol(M)))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
196 plot(0,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
197 xlim = c(0, sum(c(supercluster_size, singlets))),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
198 ylim = c(0, max(supercluster_size) * 1.2),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
199 type = "n", yaxs = 'i', axes = FALSE, xlab = "", ylab = "")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
200
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
201 rect(0, 0,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
202 sum(supercluster_size),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
203 max(supercluster_size) * 1.2,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
204 col = "#0000FF10")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
205
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
206 rect(sum(supercluster_size), 0,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
207 sum(supercluster_size) + singlets,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
208 max(supercluster_size) * 1.2,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
209 col = "#FFAAFF10")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
210
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
211 barplot(M,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
212 width = c(supercluster_size, singlets),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
213 space = 0, ylim = c(0, max(supercluster_size) * 1.2),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
214 col = "#AAAAAA", names.arg = rep("", ncol(M)), add = TRUE,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
215 xlab = "Proportion of reads [%]", ylab = "Number of reads",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
216 main = paste(N_clustering, "reads total"))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
217
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
218
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
219 for (i in seq_along(indices[,1])){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
220 y1 = sum(M[1:indices[i,'row'],indices[i,'col']])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
221 x1 = sum(M[,1:indices[i,'col']])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
222 if(indices[i,'row'] == 1){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
223 y0=0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
224 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
225 y0 = sum(M[1:(indices[i,'row']-1),indices[i,'col']])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
226 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
227 if (indices[i,'col']==1){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
228 x0=0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
229 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
230 x0 = sum(M[,1:(indices[i,'col']-1)])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
231 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
232 rect(x0,y0,x1,y1, col="#88FF88")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
233 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
234 abline(v=top_clusters_prop * sum(c(supercluster_size, singlets)), col="#00000088", lwd=3, lty=3)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
235
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
236 text(sum(supercluster_size) / 2,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
237 max(supercluster_size) * 1.05,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
238 labels = paste0(sum(supercluster_size), " reads in\n",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
239 length(supercluster_size), " supeclusters (", nrow(communities), " clusters)")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
240 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
241
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
242
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
243 text(sum(supercluster_size) + singlets / 2,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
244 max(supercluster_size) * 1.05,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
245 labels = paste(singlets, "singlets"))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
246
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
247 axis(1,at=seq(0,N_clustering,length.out=11),label=seq(0,100,by=10))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
248 dev.off()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
249 clustering_info = list(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
250 Number_of_reads_in_clusters = sum(supercluster_size),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
251 Number_of_clusters = nrow(communities),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
252 Number_of_superclusters = length(supercluster_size),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
253 Number_of_singlets = singlets
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
254 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
255 return(clustering_info)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
256 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
257
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
258
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
259 rectMap=function(x,scale.by='row',col=1,xlab="",ylab="",grid=TRUE,axis_pos=c(1,4),cexx=NULL,cexy=NULL){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
260 if (scale.by=='row'){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
261 #x=(x)/rowSums(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
262 x=(x)/apply(x,1,max)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
263 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
264 if (scale.by=='column'){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
265 x=t(t(x)/apply(x,2,max))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
266 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
267 nc=ncol(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
268 nr=nrow(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
269 coords=expand.grid(1:nr,1:nc)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
270 plot(coords[,1],coords[,2],type='n',axes=F,xlim=range(coords[,1])+c(-.5,.5),ylim=range(coords[,2])+c(-.5,.5),xlab=xlab,ylab=ylab)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
271 axis(axis_pos[1],at=1:nr,labels=rownames(x),lty=0,tick=FALSE,line=0,cex.axis=0.5/log10(nr))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
272 axis(axis_pos[2],at=1:nc,labels=colnames(x),lty=0,tick=FALSE,las=2,line=0 ,hadj=0, cex.axis=0.7)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
273 axis(2,at=1:nc,labels=colnames(x),lty=0,tick=FALSE,las=2,line=0 ,hadj=1, cex.axis=0.7)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
274
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
275 mtext(side = 1, "Cluster id", las=1, line = 3, cex = 0.5)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
276 line = 1.5 + log10(nr)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
277 mtext(side = 2, "Proportions of individual samples", las =0, line = line, cex = 0.5)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
278 s=c(x)/2 # to get it proportional
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
279 w = c(x)/2
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
280 rect(coords[,1]-0.5,coords[,2]-s,coords[,1]+0.5,coords[,2]+s,col=col,border=NA)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
281 if (grid){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
282 abline(v=0:(nr)+.5,h=0:(nc)+.5,lty=2,col="#60606030")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
283 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
284 box(col="#60606030",lty=2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
285 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
286
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
287 plot_rect_map = function(read_counts,cluster_annotation, output_file,Xcoef=1,Ycoef=1){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
288 counts = read.table(read_counts,header=TRUE,as.is=TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
289 annot = read.table(cluster_annotation, sep="\t",header=FALSE,as.is=TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
290 N = nrow(annot)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
291 colnames(annot) = c("cluster", "Automatic.classification")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
292 annot$number.of.reads = rowSums(counts[1 : nrow(annot) ,-1])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
293 unique_repeats = names(sort(table(c(annot$Automatic.classification,rep('nd',N))),decreasing = TRUE))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
294
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
295 M = as.matrix(counts[1:N,-(1:2)])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
296 rownames(M) = paste0("CL",rownames(M))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
297 Mn1=(M)/apply(M,1,max)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
298 Mn2=M/max(M)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
299 Mn2=M/apply(M,1,sum)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
300
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
301 ord1 = hclust(dist(Mn1),method = "ward.D")$order
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
302 ord2 = hclust(dist(t(Mn2)))$order
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
303 wdth = (400 + N*10 ) * Xcoef
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
304 hgt = (600 + ncol(M)*50) * Ycoef
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
305 ptsize = round((wdth*hgt)^(1/4))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
306 png(output_file, width=wdth,height=hgt, pointsize = ptsize) # was 50
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
307 ploting_area_width = 3 + log10(N)*3
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
308 ploting_area_sides = 1
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
309 layout(matrix(c(4,2,3,4,1,3),ncol=3,byrow = TRUE),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
310 width=c(ploting_area_sides,ploting_area_width,ploting_area_sides),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
311 height=c(3,ncol(M)*0.5))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
312 par(xaxs='i', yaxs = 'i')
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
313 par(las=2,mar=c(4,0,0,0),cex.axis=0.5)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
314 rectMap(Mn2[ord1,ord2],scale.by='none',col=1, grid=TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
315 par(las=2,mar=c(1,0,1,0), mgp = c(2,0.5,0))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
316 barplot(annot$number.of.reads[ord1], col = 1)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
317 mtext(side = 2, "Cluster size", las = 3, line = 2, cex = 0.5)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
318 par(mar=c(0,0,10,0))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
319 plot.new()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
320 st = dev.off()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
321 ## calculate coordinated if boxes to create hyperlink
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
322 X0 = wdth/(ploting_area_sides * 2 + ploting_area_width)* ploting_area_sides
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
323 X1 = wdth/(ploting_area_sides * 2 + ploting_area_width)*(ploting_area_sides + ploting_area_width)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
324 L = round(seq(X0,X1, length.out = N + 1)[1:N])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
325 R = round(seq(X0,X1, length.out = N + 1)[2:(N + 1)])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
326 cn = rownames(Mn2[ord1,ord2])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
327 cluster_links = paste0(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
328 "seqclust/clustering/clusters/dir_CL",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
329 sprintf("%04d", as.integer(substring(cn,3 ))),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
330 "/index.html")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
331 coords = paste0(L, ",", 1, ",", R, ",", hgt)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
332 clustermap = paste0(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
333 '\n<map name="clustermap"> \n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
334 paste0(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
335 '<area shape="rect"\n coords="',coords, '"\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
336 ' href="', cluster_links, '"\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
337 ' title="', cn, '"/>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
338 collapse = ""),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
339 "</map>\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
340 return(clustermap)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
341 }