annotate lib/create_annotation.R @ 0:1d1b9e1b2e2f draft

Uploaded
author petr-novak
date Thu, 19 Dec 2019 10:24:45 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env Rscript
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
2 Sys.setlocale("LC_CTYPE", "en_US.UTF-8") # this is necessary for handling unicode characters (data.tree package)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
3 suppressPackageStartupMessages(library(data.tree))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
4 suppressPackageStartupMessages(library(stringr))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
5 suppressPackageStartupMessages(library(R2HTML))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
6 suppressPackageStartupMessages(library(hwriter))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
7 suppressPackageStartupMessages(library(DT))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
8 suppressPackageStartupMessages(library(tools))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
9 suppressPackageStartupMessages(library(scales))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
10 suppressPackageStartupMessages(library(igraph))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
11 suppressPackageStartupMessages(library(plotrix))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
12 suppressPackageStartupMessages(library(png))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
13
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
14 source("htmlheader.R")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
15 source("config.R") # load tandem ranks info
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
16 source("utils.R")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
17 DT_OPTIONS = options = list(pageLength = 1000, lengthMenu = c(10,50,100,1000,5000,10000))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
18 WD = getwd() # to get script directory when run from Rserve
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
19 HTMLHEADER = htmlheader ## header (character) loaded from htmlheader.R
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
20 htmlheader = gsub("Superclusters summary","TAREAN summary", htmlheader)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
21
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
22 evaluate_LTR_detection = function(f){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
23 NO_LTR=NULL
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
24 if (length(readLines(f)) == 11 ){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
25 return(NO_LTR)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
26 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
27 df=read.table(f,as.is=TRUE,sep="\t", skip=11,fill=TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
28 if (ncol(df) != 23){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
29 #df is smaller if no pbs is detected!
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
30 return(NO_LTR)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
31 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
32 df=df[!df$V13 == "",,drop=FALSE]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
33 if (nrow(df)==0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
34 return(NO_LTR)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
35 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
36 # criteria:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
37 df_part=df[df$V15 >=12 & df$V20 == 23 & df$V21<df$V20,,drop=FALSE]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
38 if (nrow(df_part) == 0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
39 return(NO_LTR)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
40 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
41 PBS_type = gsub("_","",(str_extract_all(df_part$V13, pattern="_([A-Z][a-z]{2})", simplify=TRUE))) %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
42 paste(collapse=" ")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
43 return(PBS_type)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
44 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
45
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
46
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
47
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
48 ## annotate superclusters
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
49 select_reads_id = function(index, search_type = c("cluster","supercluster")) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
50 ## select read if base on the supecluster index need database connection
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
51 ## HITSORTDB!
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
52 search_type = match.arg(search_type)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
53 x = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
54 paste0("SELECT vertexname FROM vertices WHERE vertexindex IN ",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
55 "(SELECT vertexindex FROM communities ",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
56 "WHERE ", search_type,"=\"", index,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
57 "\")"))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
58 return(x$vertexname)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
59 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
60
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
61
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
62 get_reads_annotation = function(reads_id) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
63 ## select annotation from tables in SEQDB which has name in format *_database
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
64 annot_types = grep("_database", dbListTables(SEQDB), value = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
65 annot = list()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
66 for (i in annot_types) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
67 query = paste0("SELECT * FROM ", i, " WHERE name IN (", paste0("\"", reads_id,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
68 "\"", collapse = ", "), ")")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
69 annot[[i]] = dbGetQuery(SEQDB, query)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
70 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
71 return(annot)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
72 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
73
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
74 supercluster_size = function(supercluster) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
75 x = dbGetQuery(HITSORTDB, paste0("SELECT count(*) FROM vertices WHERE vertexindex IN ",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
76 "(SELECT vertexindex FROM communities ", "WHERE supercluster=\"", supercluster,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
77 "\")"))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
78 return(x$"count(*)")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
79 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
80
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
81
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
82 cluster_annotation = function(cluster, search_type = c("cluster", "supercluster")){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
83 ## searcheither for cluster or supercluster annotation
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
84 ## read annotation from sqlite databases database is access though SEQDB (sequence
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
85 ## annotation) and HITSORTDB - clustering information
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
86 search_type = match.arg(search_type)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
87 reads_id = select_reads_id(cluster, search_type)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
88 annot = get_reads_annotation(reads_id)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
89 return(annot)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
90 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
91
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
92 get_tarean_info = function(cluster, search_type = c("cluster", "supercluster")){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
93 search_type = match.arg(search_type)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
94 if (search_type == "cluster") {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
95 search_type = "[index]"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
96 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
97 tarean_info = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
98 paste0(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
99 "SELECT [index], supercluster, satellite_probability, tandem_rank, size_real, satellite FROM cluster_info WHERE ",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
100 search_type, " = ", cluster))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
101 nhits = sum(tarean_info$size_real[tarean_info$tandem_rank %in% 1:2])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
102 proportion = nhits/sum(tarean_info$size_real)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
103 return( list(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
104 nhits = nhits,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
105 proportion = proportion,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
106 tarean_info = tarean_info)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
107 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
108
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
109 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
110
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
111 get_ltr_info = function(supercluster){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
112 ltr_info = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
113 paste0(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
114 "SELECT [index], supercluster, ltr_detection, size_real FROM cluster_info WHERE ",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
115 "supercluster", " = ", supercluster))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
116 return(ltr_info)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
117 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
118
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
119 summarize_annotation = function(annot, size = NULL){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
120 db_id = do.call(rbind, annot)$db_id
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
121 cl_string = gsub("^.+/","",gsub(":.+$", "", gsub("^.+#", "", db_id)))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
122 weight = do.call(rbind, annot)$bitscore
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
123 domain = str_replace(str_extract(str_replace(db_id, "^.+#", ""), ":.+"), ":",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
124 "")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
125 domain[is.na(domain)] = ""
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
126 domain_table = table(cl_string, domain)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
127 dt = as.data.frame(domain_table)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
128 ## remove no count
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
129 dt = dt[dt$Freq != 0, ,drop = FALSE]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
130 rownames(dt) = paste(dt$cl_string,dt$domain)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
131 if (!is.null(size)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
132 dt$proportion = dt$Freq / size
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
133
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
134 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
135 return(dt)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
136 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
137
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
138 annot2colors = function(annot, ids, base_color = "#00000050"){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
139 annot_table = do.call(rbind, annot)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
140 domains = str_replace(str_extract(str_replace(annot_table$db_id, "^.+#", ""), ":.+"), ":","")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
141 other_annot = str_replace(annot_table$db_id, "^.+#", "")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
142 complete_annot = ifelse(is.na(domains), other_annot, domains)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
143 names(complete_annot) = annot_table$name
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
144 unique_complete_annot = names (table(complete_annot) %>% sort(decreasing = TRUE))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
145 color_key = unique_colors[seq_along(unique_complete_annot)]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
146 names(color_key) = unique_complete_annot
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
147 color_table = rep(base_color, length(ids))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
148 names(color_table) = ids
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
149 color_table[names(complete_annot)] = color_key[complete_annot]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
150 return(list( color_table = color_table, legend = color_key))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
151 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
152
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
153
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
154 read_annotation_to_tree = function(supercluster, TREE_TEMPLATE = CLS_TREE) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
155 ## CLS_TREE is datatree containing 'taxonomy' information of repeats, attribute of
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
156 ## each node/ leave if filled up from annotation annotation is added to leaves
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
157 ## only!! Inner node are NA or NULL
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
158 annot0 = cluster_annotation(supercluster, search_type = "supercluster")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
159 ## keep only built in annotation
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
160 annot = list(protein_databse = annot0$protein_database, dna_database = annot0$dna_database)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
161 annot$dna_database$bitscore = annot$dna_database$bitscore / 2 #DNA bitscore corection, blastx - more weight
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
162 annot = do.call(rbind, annot)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
163 ## remove duplicated hits, keep best
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
164 annot_clean = annot[order(annot$bitscore, decreasing = TRUE), ]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
165 annot_clean = annot_clean[!duplicated(annot_clean$name), ]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
166 tarean_info = get_tarean_info(supercluster, search_type = "supercluster")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
167 ltr_info = get_ltr_info(supercluster)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
168 db_id = annot_clean$db_id
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
169 cl_string = gsub(":.+$", "", gsub("^.+#", "", db_id))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
170 weight = annot_clean$bitscore
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
171 domain = str_replace(str_extract(str_replace(db_id, "^.+#", ""), ":.+"), ":",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
172 "")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
173 domain[is.na(domain)] = "NA"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
174 mean_weight_table = by(weight, INDICES = list(cl_string, domain), FUN = function(x) signif(mean(x)))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
175 mean_weight_table[is.na(mean_weight_table)] = 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
176 total_weight_table = by(weight, INDICES = list(cl_string, domain), FUN = sum)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
177 total_weight_table[is.na(total_weight_table)] = 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
178 cl_table = table(cl_string)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
179 domain_table = table(cl_string, domain)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
180 cls_tree = Clone(TREE_TEMPLATE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
181 cls_tree$size = supercluster_size(supercluster)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
182 cls_tree$ltr_info = ltr_info
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
183 for (i in cls_tree$leaves) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
184 if (i$full_name %in% names(cl_table)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
185 i$nhits = cl_table[[i$full_name]]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
186 i$domains = domain_table[i$full_name, ]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
187 names(i$domains) = colnames(domain_table)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
188 i$mean_weight = mean_weight_table[i$full_name, ]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
189 i$total_weight = total_weight_table[i$full_name, ]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
190 i$proportion = signif(i$nhits/cls_tree$size, 3)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
191 } else {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
192 if (i$name == "satellite"){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
193 i$nhits = tarean_info$nhits
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
194 i$tandem_rank = tarean_info$tarean_info$tandem_rank
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
195 i$proportion = tarean_info$proportion
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
196 i$tarean_info = tarean_info$tarean_info
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
197 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
198 i$proportion = 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
199 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
200 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
201 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
202 ## create domain string for printing:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
203 for (i in Traverse(cls_tree)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
204 if (is.null(i$domains)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
205 i$features = ""
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
206 } else if (sum(i$domains) == 0) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
207 i$features = ""
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
208 } else {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
209 dom = i$domains[!names(i$domains) == "NA"]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
210 i$features = gsub(" *\\(\\)", "", pasteDomains(dom))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
211 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
212 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
213 ## add LTR info:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
214 if (any(!cls_tree$ltr_info$ltr_detection %in% "None")){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
215 tr = FindNode(cls_tree, "LTR")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
216 tr$features = "LTR/PBS"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
217 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
218 return(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
219 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
220
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
221 pasteDomains = function(dom){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
222 d = dom[dom != 0]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
223 if (length(d) == 0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
224 return("")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
225 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
226 paste0(d, " (", names(d), ")", sep = ", ", collapse="")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
227 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
228
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
229 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
230
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
231 rescale = function(x, newrange) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
232 # taken from plotrix package
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
233 if (missing(x) | missing(newrange)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
234 usage.string <- paste("Usage: rescale(x,newrange)\n", "\twhere x is a numeric object and newrange is the new min and max\n",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
235 sep = "", collapse = "")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
236 stop(usage.string)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
237 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
238 if (is.numeric(x) && is.numeric(newrange)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
239 xna <- is.na(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
240 if (all(xna))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
241 return(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
242 if (any(xna))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
243 xrange <- range(x[!xna]) else xrange <- range(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
244 if (xrange[1] == xrange[2])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
245 return(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
246 mfac <- (newrange[2] - newrange[1])/(xrange[2] - xrange[1])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
247 return(newrange[1] + (x - xrange[1]) * mfac)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
248 } else {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
249 warning("Only numeric objects can be rescaled")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
250 return(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
251 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
252 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
253
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
254 add_value_to_nodes = function(tr, value_names = c("nhits", "domains", "total_weight",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
255 "proportion")) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
256 ## propagate values from leaves to nodes, assume that nodes are not defined
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
257 ## (either NA or NULL) leaves coud be be either numeric of list, if list values
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
258 ## are summed up based on the names
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
259 for (vn in value_names) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
260 for (i in Traverse(tr)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
261 w = add_leaves_value(i$leaves, vn)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
262 i[[vn]] = w
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
263 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
264 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
265 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
266
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
267 add_leaves_value = function(x, name) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
268 ## check if annotation is multivalue or single value, propagates values from
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
269 ## leaves to root
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
270 n = unique(unlist(lapply(lapply(x, "[[", name), names)))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
271 if (is.null(n)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
272 ## no names given, we can sum everything to one value
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
273 total = sum(unlist(sapply(x, "[[", name)), na.rm = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
274 return(total)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
275 } else {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
276 total = numeric(length(n))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
277 names(total) = n
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
278 xvals = lapply(x, "[[", name)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
279 N = 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
280 for (i in xvals) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
281 for (j in names(i)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
282 total[j] = total[j] + i[j]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
283 N = N + 1
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
284 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
285 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
286 return(total)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
287 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
288 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
289
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
290
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
291 trmap = function(tr, xl = 0, yb = 0, xr = 1, yt = 1, first = TRUE, vertical = TRUE) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
292 # treemap for data.tree plotting - experimental
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
293 if (first) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
294 plot(xl:xr, yb:yt, type = "n", axes = FALSE, xlab = "", ylab = "")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
295 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
296 if (tr$nhits > 0) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
297 width = 2 * (6 - tr$level)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
298 rect(xl, yb, xr, yt, col = "#00008805", lwd = width, border = "#00000080")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
299 if (tr$level != 1) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
300 text(x = (xl + xr)/2, y = (yb + yt)/2, labels = tr$name, cex = width/4)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
301 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
302 } else {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
303 return(NULL)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
304 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
305 Nchildren = length(tr$children)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
306 cat("children:", tr$name, tr$level, "\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
307 if (Nchildren == 0) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
308 return(NULL)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
309 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
310 sizes = sapply(tr$children, "[[", "nhits")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
311 rng = c(sizes, 0) / sum(sizes)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
312 if (vertical) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
313 ## x does not change
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
314 xl2 = rep(xl, Nchildren)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
315 xr2 = rep(xr, Nchildren)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
316 yb2 = rescale(rng, c(yb, yt))[1:(Nchildren + 1)]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
317 yt2 = rescale(rng, c(yb, yt))[-1]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
318 } else {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
319 yb2 = rep(yb, Nchildren)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
320 yt2 = rep(yt, Nchildren)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
321 xr2 = rescale(rng, c(xl, xr))[1:(Nchildren + 1)]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
322 xl2 = rescale(rng, c(xl, xr))[-1]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
323 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
324 for (i in 1:Nchildren) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
325 trmap(tr$children[[i]], xl2[i], yb2[i], xr2[i], yt2[i], first = FALSE, vertical = !vertical)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
326 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
327
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
328 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
329
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
330 filter_tree = function(tr) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
331 ## keep only nodes with positive nhits values
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
332 ## must be used on trees where leaves were already propagated
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
333 ## using add_values_to_nodes
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
334 tr_filtered = Clone(tr)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
335 Prune(tr_filtered, function(x) x$nhits > 0 | containLTR(x))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
336 return(tr_filtered)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
337 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
338
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
339 containLTR = function(tr){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
340 for (i in Traverse(tr)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
341 if (i$features == "LTR/PBS"){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
342 return(TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
343 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
344 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
345 return(FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
346 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
347
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
348 filter_tree2 = function(tr) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
349 tr_filtered = Clone(tr)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
350 Prune(tr_filtered, function(x) x$parent$nhits > 0)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
351 return(tr_filtered)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
352 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
353
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
354 ## this is for testing purposesm in futurem each node will have its function
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
355 ## named find_best_hit, it will work like decisin tree.
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
356 ## functions will be set manually based on training data
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
357 formatWidth = function(x, w){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
358 if (length (dim(x)) > 1){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
359 for (i in seq_along(x)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
360 delta = nchar(x[,i], type="bytes") - nchar(x[,i], type="char")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
361 ## delta correction is neccessary for utf-8 correct formating
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
362 x[,i] = sprintf(paste0("%-",w[i] + delta,"s"), x[,i])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
363 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
364 return(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
365 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
366 delta = nchar(x, type="bytes") - nchar(x, type="char")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
367 sprintf(paste0("%",w + delta,"s"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
368 x) %>% return
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
369 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
370 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
371
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
372
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
373 find_best_hit_repeat = function(cls_tree){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
374 ## three children:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
375 ## repeat
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
376 ## |--rDNA
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
377 ## |--satellite this need special handling, rank 1 or 2
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
378 ## |--mobile_element
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
379 ##
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
380 ## in this case we require that satellite has proportion 0.95
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
381 ## params of good hit
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
382 min_prop_of_positive = 0.7
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
383 min_prop_second_best = 0.9
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
384 min_proportion_x_nhits = 2.5 # based on 0.05 x 50
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
385 min_satellite_proportion = 0.95
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
386 if (isLeaf(cls_tree)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
387 return(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
388 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
389 cond1 = cls_tree$nhits * cls_tree$proportion < min_proportion_x_nhits
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
390 if (cond1){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
391 return(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
392 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
393 nhits = sapply(cls_tree$children, "[[", "nhits")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
394 all_hits = cls_tree$root$nhits
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
395 name_of_best_hit = cls_tree$children[[which.max(nhits)]]$name
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
396 best_hit_child = cls_tree$children[[which.max(nhits)]]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
397 second_max_hits = ifelse(length(nhits) == 1, 0, max(nhits[-which.max(nhits)]))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
398 cond2 = max(nhits) / sum(nhits) > min_prop_of_positive
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
399 cond3 = max(nhits) / (second_max_hits + max(nhits)) > min_prop_second_best
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
400 cond_satellite = best_hit_child$proportion > min_satellite_proportion & name_of_best_hit == "satellite"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
401 cond_other = ! name_of_best_hit == "satellite"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
402 cat(cond2, cond3, cond_satellite, cond_other, "\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
403 if (cond2 & cond3 & (cond_satellite | cond_other)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
404 ## clear case satellite or other
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
405 if ("find_best_hit" %in% names(best_hit_child)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
406 ## custom rules for node is defined
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
407 best_hit = best_hit_child$find_best_hit(cls_tree$children[[which.max(nhits)]])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
408 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
409 # use generic rules
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
410 best_hit = find_best_hit(cls_tree$children[[which.max(nhits)]])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
411 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
412 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
413 ## do more specific tests for rDNA, or mobile element
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
414 ## rDNA o mobile_elements must be above threshold!
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
415 cond_sat_rank = any(cls_tree$satellite$tandem_rank == 2) & cls_tree$satellite$nhits != 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
416 cond_rdna = cls_tree$rDNA$nhits * cls_tree$rDNA$proportion > min_proportion_x_nhits
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
417 cond_mobile = cls_tree$mobile_element$nhits * cls_tree$mobile_element$proportion > min_proportion_x_nhits
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
418 cat(cond_sat_rank, "\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
419 cat(cond_rdna,"\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
420 cat(cond_mobile, "\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
421 if (cond_sat_rank & (cond_rdna | cond_mobile) ){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
422 cls_tree$satellite$nhits = 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
423 best_hit = find_best_hit(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
424 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
425 return(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
426 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
427 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
428 return(best_hit)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
429 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
430
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
431 find_best_hit = function(cls_tree){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
432 ## general params of good hit
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
433 min_prop_of_positive = 0.7
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
434 min_prop_second_best = 0.8
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
435 min_proportion_x_nhits = 2.5 # based on 0.05 x 50
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
436 if (isLeaf(cls_tree)) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
437 return(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
438 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
439
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
440
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
441 cond1 = cls_tree$nhits * cls_tree$proportion < min_proportion_x_nhits
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
442
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
443 if (cond1){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
444 ## return if proportions is lower than threshold
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
445 return(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
446 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
447
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
448 nhits = sapply(cls_tree$children, "[[", "nhits")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
449 best_hit_child = cls_tree$children[[which.max(nhits)]]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
450 ## more special cases:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
451 second_max_hits = ifelse(length(nhits) == 1, 0, max(nhits[-which.max(nhits)]))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
452 cond2 = max(nhits) / sum(nhits) > min_prop_of_positive
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
453 cond3 = max(nhits) / (second_max_hits + max(nhits)) > min_prop_second_best
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
454 if (cond2 & cond3) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
455 if ("find_best_hit" %in% names(best_hit_child)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
456 ## custom rules for node is defined
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
457 best_hit = best_hit_child$find_best_hit(cls_tree$children[[which.max(nhits)]])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
458 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
459 # use generic rules
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
460 best_hit = find_best_hit(cls_tree$children[[which.max(nhits)]])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
461 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
462 } else {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
463 return(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
464 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
465 return(best_hit)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
466 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
467
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
468
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
469 get_annotation_groups = function(tr){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
470 tr_all = Clone(tr)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
471 Prune(tr_all, pruneFun=function(...)FALSE) ## prune everithing -> keep root
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
472 tr_all$name = "Unclassified repeat (No evidence)"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
473 tr_contamination = Clone(tr)$contamination
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
474 tr_organelle = Clone(tr)$organelle
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
475 tr_repeat = Clone(tr)$"repeat"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
476 tr_repeat$name = "Unclassified_repeat (conflicting evidences)"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
477 return(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
478 list(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
479 tr_repeat = tr_repeat,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
480 tr_organelle = tr_organelle,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
481 tr_all = tr_all,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
482 tr_contamination = tr_contamination
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
483 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
484 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
485 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
486
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
487
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
488
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
489 format_tree = function(cls_tree, ...) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
490 df = ToDataFrameTree(cls_tree, ...)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
491 ## try to get rid off unicode characters
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
492 df[,1] = gsub("\U00B0", "'", df[,1], fixed = TRUE, useBytes = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
493 df[,1] = gsub("\U00A6", "|", df[,1], fixed = TRUE, useBytes = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
494 colnames(df)[1] = " " ## originally levelName
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
495 if ("proportion" %in% colnames(df)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
496 df$proportion = signif(df$proportion,2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
497 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
498 if ("Proportion[%]" %in% colnames(df)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
499 df$"Proportion[%]" = round(df$"Proportion[%]", 2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
500 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
501 ## format header
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
502 w1 = apply(df, 2, function(x) max(nchar(x)))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
503 w2 = nchar(colnames(df))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
504 # use whatever with is bigger for formating
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
505 w = ifelse(w1 > w2, w1, w2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
506
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
507 out = character()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
508 header = mapply(FUN = function(x, y) formatWidth(x, w = y), colnames(df), w) %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
509 paste0(collapse=" | ")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
510 hr_line = gsub(".","-",header)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
511 # create output
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
512 # classification lines
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
513 class_lines = formatWidth(df, w) %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
514 apply(1, FUN = function(x) paste0(x,collapse = " | ")) %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
515 paste(collapse = "\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
516 paste(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
517 header,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
518 hr_line,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
519 class_lines,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
520 sep="\n") %>% return
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
521 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
522
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
523
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
524 make_final_annotation_template = function(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
525 annot_attributes = list(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
526 Nreads = 0,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
527 Nclusters = 0,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
528 Nsuperclusters = 0,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
529 "Proportion[%]" = 0,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
530 cluster_list = c())
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
531 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
532 {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
533 ct = Clone(CLS_TREE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
534 for (i in Traverse(ct)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
535 for (j in names(annot_attributes)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
536 i[[j]] = annot_attributes[[j]]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
537 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
538 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
539 return(ct)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
540 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
541
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
542
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
543 common_ancestor = function(tr1, tr2){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
544 a1 = tr1$Get('name', traversal = "ancestor")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
545 a2 = tr2$Get('name', traversal = "ancestor")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
546 ancestor = intersect(a1,a2)[1]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
547 return(ancestor)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
548 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
549
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
550 create_all_superclusters_report = function(max_supercluster = 100,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
551 paths,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
552 libdir,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
553 superclusters_dir,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
554 seqdb, hitsortdb,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
555 classification_hierarchy_file,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
556 HTML_LINKS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
557 {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
558 ## connect to sqlite databases
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
559 ## seqdb and hitsortdb are path to sqlite files
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
560 HTML_LINKS = nested2named_list(HTML_LINKS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
561 paths = nested2named_list(paths)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
562 report_file = paths[['supercluster_report_html']]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
563 ## create SEQDB, HTISORTDB and CLS_TREE
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
564 connect_to_databases(seqdb, hitsortdb, classification_hierarchy_file)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
565
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
566 ## append specific classication rules
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
567 CLS_TREE$"repeat"$find_best_hit = find_best_hit_repeat
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
568
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
569 ###
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
570 cat("Superclusters summary\n", file = report_file)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
571 cat("---------------------\n\n", file = report_file, append = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
572 empty = rep(NA,max_supercluster)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
573 SC_table = data.frame(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
574 Supercluster = 1 : max_supercluster, "Number of reads" = empty, Automatic_annotation = empty,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
575 Similarity_hits = empty, TAREAN_annotation = empty,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
576 Clusters = empty, stringsAsFactors = FALSE, check.names = FALSE
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
577 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
578 SC_csv = SC_table # table as spreadsheet - no html formatings
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
579 final_cluster_annotation = make_final_annotation_template()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
580
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
581 for (sc in 1 : max_supercluster) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
582 cat("supercluster: ",sc,"\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
583 cls_tree = read_annotation_to_tree(sc)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
584 sc_summary = get_supercluster_summary(sc)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
585 add_value_to_nodes(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
586 cls_tree_filtered = filter_tree(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
587 best_hit = find_best_hit(cls_tree)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
588 ## exception to decision tree put here.
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
589 ## All -> class I ?
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
590 if (best_hit$name == "All"){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
591 ## check LTR
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
592 if (any(!(unique(cls_tree$ltr_info$ltr_detection) %in% "None"))){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
593 ## if LTR found - move classification to Class I
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
594
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
595 LTR = FindNode(best_hit, "LTR")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
596 nhits_without_class_I = best_hit$nhits - LTR$nhits
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
597 prop_without_class_I = best_hit$proportion - LTR$proportion
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
598 cond3 = nhits_without_class_I * prop_without_class_I < 1
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
599 cond2 = best_hit$nhits * best_hit$proportion < 0.5 # other hits weak
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
600 cond1 = LTR$nhits >= (0.95 * best_hit$nhits) # hits are pre in sub class_I
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
601
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
602 if (cond1 | cond2 | cond3){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
603 best_hit = LTR
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
604 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
605 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
606 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
607 ## check is conflict os best hit with LTR/PBS exists
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
608 if (any(!(unique(cls_tree$ltr_info$ltr_detection) %in% "None"))){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
609 LTR = FindNode(cls_tree, "LTR")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
610 ca_name = common_ancestor(LTR, best_hit)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
611 if (ca_name != "LTR"){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
612 ## reclassify
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
613 best_hit = FindNode(cls_tree, ca_name)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
614 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
615 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
616 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
617
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
618 best_hit_name = best_hit$name
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
619 best_hit_path = paste(best_hit$path, collapse = "/")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
620 ## add best hit do database:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
621 sqlcmd = paste0(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
622 "UPDATE cluster_info SET supercluster_best_hit = \"", best_hit_path, "\" ",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
623 " WHERE supercluster = ", sc
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
624 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
625 dbExecute(HITSORTDB, sqlcmd)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
626 # add annotation annotation summary stored in final_cluster_annotation - summing up
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
627 best_hit_node = FindNode(final_cluster_annotation, best_hit_name)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
628 best_hit_node$Nsuperclusters = best_hit_node$Nsuperclusters + 1
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
629 best_hit_node$Nclusters = best_hit_node$Nclusters + sc_summary$Nclusters
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
630 best_hit_node$Nreads = best_hit_node$Nreads + sc_summary$Nreads
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
631 best_hit_node$"Proportion[%]" = best_hit_node$"Proportion[%]" + sc_summary$proportion*100
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
632 best_hit_node$cluster_list = append(best_hit_node$cluster_list, sc_summary$cluster_list)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
633 best_hit_label = ifelse(best_hit_name == "All", "NA", best_hit_name)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
634 SC_csv$Automatic_annotation[sc] = best_hit_label
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
635 SC_csv$"Number of reads"[sc] = SC_table$"Number of reads"[sc] = cls_tree$size
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
636 SC_csv$Similarity_hits[sc] = format_tree(cls_tree_filtered,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
637 "nhits", "proportion", "features")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
638 SC_table$Similarity_hits[sc] = format_tree(cls_tree_filtered,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
639 "nhits", "proportion", "features") %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
640 preformatted
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
641
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
642 SC_table$Automatic_annotation[sc] = best_hit_label
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
643
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
644 G = get_supercluster_graph(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
645 sc=sc, seqdb = seqdb,hitsortdb = hitsortdb,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
646 classification_hierarchy_file = classification_hierarchy_file
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
647 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
648 ## append tarean annotation
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
649 clist = paste(V(G)$name, collapse =",")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
650 cl_rank = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
651 paste("SELECT [index], tandem_rank FROM cluster_info WHERE [index] IN (",clist,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
652 ") AND tandem_rank IN (1,2)"))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
653 ## add information about TR clusters is any
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
654 if (nrow(cl_rank) > 0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
655 tarean_annot = paste(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
656 cl_rank$index, "-",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
657 RANKS_TANDEM[cl_rank$tandem_rank]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
658 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
659 SC_csv$TAREAN_annotation[sc] = paste(tarean_annot,collapse="\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
660 SC_table$TAREAN_annotation[sc] = mapply(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
661 hwrite, tarean_annot,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
662 link = sprintf(HTML_LINKS$ROOT_TO_TAREAN,cl_rank$index)) %>% paste(collapse="<br>")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
663 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
664 ## add links to clusters
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
665 SC_csv$Clusters[sc] = paste((V(G)$name), collapse = ",")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
666 links = mapply(hwrite, V(G)$name, link = sprintf(HTML_LINKS$ROOT_TO_CLUSTER, as.integer(V(G)$name)))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
667 SC_table$Clusters[sc] = paste(links, collapse =", ")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
668 create_single_supercluster_report(G, superclusters_dir)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
669 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
670
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
671
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
672
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
673 ## add html links
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
674 SC_table$Supercluster = mapply(hwrite, SC_table$Supercluster,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
675 link = sprintf(HTML_LINKS$ROOT_TO_SUPERCLUSTER,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
676 SC_table$Supercluster))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
677
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
678
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
679
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
680 write.table(SC_csv, file = paths[["superclusters_csv_summary"]],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
681 sep = "\t", row.names = FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
682
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
683
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
684 DT_instance = datatable(SC_table, escape = FALSE, options = DT_OPTIONS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
685 saveWidget(DT_instance, file = normalizePath(report_file),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
686 normalizePath(libdir), selfcontained = FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
687 add_preamble(normalizePath(report_file),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
688 preamble='<h2>Supercluster annotation</h2> <p><a href="documentation.html#superclust"> For table legend see documentation. <a> </p>')
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
689
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
690 annot_groups = get_annotation_groups(final_cluster_annotation)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
691 saveRDS(object=annot_groups, file = paths[['repeat_annotation_summary_rds']])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
692 final_cluster_annotation_formated = paste(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
693 sapply(annot_groups, format_tree, 'Proportion[%]', 'Nsuperclusters', 'Nclusters', "Nreads"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
694 collapse = "\n\n\n"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
695 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
696 ## TODO add singleton counts, total counts and extra text - csv output
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
697 ## export annotation summary
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
698 html_summary = start_html(paths[['summarized_annotation_html']], gsub("PAGE_TITLE", "Repeat Annotation Summary", HTMLHEADER))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
699 html_summary("Repeat annotation summary", HTML.title, HR=2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
700 html_summary("This table summarizes the automatic annotations of superclusters that should be verified and manually corrected if necessary. Thus, the table should not be used as the final output of the analysis without critical evaluation.", HTML.title, HR=3)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
701 html_summary(preformatted(final_cluster_annotation_formated), cat)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
702
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
703
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
704
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
705 return()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
706 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
707 ## for testing
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
708 if (FALSE){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
709 create_supercluster_report(1:100, report_file, seqdb, hitsortdb, class_file)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
710 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
711
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
712 create_single_supercluster_report = function(G, superclusters_dir){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
713 sc_dir = paste0(superclusters_dir, "/dir_SC",sprintf("%04d", G$name))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
714 htmlfile = paste0(sc_dir, "/index.html")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
715 dir.create(sc_dir)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
716 title = paste("Supercluster no. ",G$name)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
717 htmlheader = gsub("PAGE_TITLE", title, HTMLHEADER)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
718 cat(htmlheader, file = htmlfile)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
719 file.copy(paste0(WD,"/style1.css"), sc_dir)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
720 HTML.title(title, file = htmlfile)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
721 HTML("Simmilarity hits (only hits with proportion above 0.1% in at least one cluster are shown) ", file = htmlfile)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
722 img_file = paste0(sc_dir,"/SC",sprintf("%0d",G$name), ".png")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
723 png(filename = img_file, width = 1400, height=1200)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
724 coords = plot_supercluster(G = G)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
725 dev.off()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
726 ## basename - make link relative
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
727 href = paste0("../../clusters/dir_CL",sprintf("%04d", as.numeric(V(G)$name)),"/index.html")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
728 html_insert_image(basename(img_file), htmlfile,coords=coords, href = href)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
729
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
730 if (is_comparative()){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
731 HTML("Comparative analysis", file = htmlfile)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
732 img_file = paste0(sc_dir,"/SC",sprintf("%0d",G$name), "comparative.png")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
733 png(filename = img_file, width = 1400, height=1200)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
734 coords = plot_supercluster(G = G, "comparative")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
735 mtext("comparative analysis")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
736 dev.off()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
737 ## basename - make link relative
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
738 href = paste0("../../clusters/dir_CL",sprintf("%04d", as.numeric(V(G)$name)),"/index.html")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
739 html_insert_image(basename(img_file), htmlfile,coords=coords, href = href)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
740
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
741 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
742 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
743
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
744 html_insert_image = function(img_file, htmlfile, coords=NULL, href=NULL) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
745 img_tag = sprintf('<p> <img src="%s" usemap="#clustermap" > \n</p>\n<br>\n', img_file)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
746 cat(img_tag, file = htmlfile, append = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
747 if (!is.null(coords)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
748 formated_links = character()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
749 for (i in seq_along(href)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
750 formated_links[i] = sprintf('<area shape="circle" coords="%f,%f,%f" href="%s" >\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
751 coords[i,1],coords[i,2],coords[,3],href[i])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
752 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
753 cat('<map name="clustermap" >\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
754 formated_links,"</map>\n", file=htmlfile, append = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
755
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
756 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
757 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
758
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
759 html_insert_floating_image = function(img_file,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
760 htmlfile,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
761 width=NULL,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
762 title= "",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
763 footer = ""
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
764 ){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
765 if (is.null(width)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
766 width=""
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
767 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
768 tag = paste(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
769 '<div class="floating_img">\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
770 ' <p>', title,'</p>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
771 ' <a href=',img_file, ">",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
772 ' <img src="',img_file, '" alt="image" width="',width,'">\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
773 ' </a>',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
774 ' <p> ',footer,'</p>\n',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
775 '</div>\n'
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
776 ,sep = "")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
777 cat(tag, file = htmlfile, append = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
778 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
779
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
780 get_supercluster_graph = function(sc, seqdb, hitsortdb, classification_hierarchy_file){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
781 ## sc - superclusted index
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
782 ## seqdb, hitsortdb - path to sqlite databases
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
783 ## classificationn_hierarchy_file - path data.tree rds file
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
784 ## SIZE of image
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
785 SIZE = 1000
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
786 connect_to_databases(seqdb, hitsortdb, classification_hierarchy_file)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
787 clusters = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
788 paste0("SELECT cluster, size FROM superclusters WHERE supercluster=",sc)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
789 ) %>% as.data.frame
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
790 ## string for sql query:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
791 cluster_list = paste0( "(", paste0(clusters$cluster, collapse = ","),")")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
792 supercluster_ncol = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
793 paste("SELECT c1,c2,w, k FROM cluster_mate_bond where c1 IN ",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
794 cluster_list, " AND c2 IN ", cluster_list, "AND k > 0.1"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
795 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
796 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
797 if (nrow(supercluster_ncol)>0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
798 ## at least two clusters
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
799 G = graph.data.frame(supercluster_ncol, directed=FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
800 L = layout_with_kk(G)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
801 ## layout is rescaled for easier linking html
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
802 L[,1] = scales::rescale(L[,1], to = c(1,SIZE) )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
803 L[,2] = scales::rescale(L[,2], to = c(1,SIZE) )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
804 G$L = L
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
805 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
806 ## only one cluster in supercluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
807 G = graph.full(n = 1)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
808 V(G)$name = as.character(clusters$cluster)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
809 G$L = matrix(c(SIZE/2, SIZE/2), ncol=2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
810 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
811 G = set_vertex_attr(G, "label", value = paste0("CL",names(V(G))))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
812 G$name=sc ## names is supercluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
813 # create annotation of individual clusters, will be attached as node attribudes
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
814 annot = get_cluster_annotation_summary(clusters)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
815 ## annotation of nodes(clusters)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
816 G = set_vertex_attr(G, "annotation", value = annot$clusters[names(V(G))])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
817 G = set_vertex_attr(G, "size",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
818 value = clusters$size[match(names(V(G)), as.character(clusters$cluster))])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
819 G$annotation = annot$supercluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
820 G$clusters = clusters
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
821 # TODO if comparative analysis - add proportion of species
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
822 if (is_comparative()){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
823 comparative_counts = get_cluster_comparative_counts(cluster_list)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
824 NACluster = comparative_counts$supercluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
825 # some clusters do not have comparative data - they are bellow threshold!
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
826 NACluster[,] = NA
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
827 counts_adjusted = comparative_counts$clusters[names(V(G))]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
828 for (i in names(V(G))){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
829 if(is.null(counts_adjusted[[i]])){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
830 ## get count from database
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
831 seqid = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
832 paste(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
833 "SELECT vertexname FROM vertex_cluster where cluster = ",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
834 i)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
835 )$vertexname
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
836 codes = get_comparative_codes()$prefix
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
837 x = table(factor(substring(seqid, 1,nchar(codes[1])), levels = codes))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
838 counts_adjusted[[i]] = data.frame(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
839 Freq = as.numeric(x),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
840 proportion = as.numeric(x/sum(x)),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
841 row.names = names(x))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
842
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
843 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
844 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
845
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
846 G = set_vertex_attr(G, "comparative_counts",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
847 value = counts_adjusted[V(G)$name]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
848 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
849 # for whole supercluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
850 G$comparative = comparative_counts$supercluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
851 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
852
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
853 return(G)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
854 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
855
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
856 get_cluster_comparative_counts = function(cluster_list){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
857 counts = dbGetQuery(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
858 HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
859 paste0("SELECT * FROM comparative_counts WHERE clusterindex IN",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
860 cluster_list
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
861 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
862 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
863 comparative_counts = apply(counts, 1, function(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
864 y = data.frame(Freq = x[-1], proportion = x[-1]/sum(x[-1]))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
865 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
866 names(comparative_counts) = counts$clusterindex
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
867 total_counts = data.frame(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
868 Freq = colSums(counts[,-1]),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
869 proportion = colSums(counts[,-1])/sum(counts[,-1]))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
870 return(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
871 list(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
872 clusters = comparative_counts,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
873 supercluster = total_counts
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
874 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
875 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
876
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
877 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
878
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
879 get_cluster_annotation_summary = function(clusters){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
880 ## clusters - table of clusters, col: cluster, size
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
881 annot_list = apply(clusters,1,FUN = function(x)summarize_annotation(cluster_annotation(x[1]),x[2]))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
882 ## if empty - not annotation at all
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
883 if (sum(sapply(annot_list, nrow)) == 0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
884 return(list (clusters = NULL,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
885 superclusters = NULL
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
886 ))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
887 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
888 names(annot_list) = as.character(clusters$cluster)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
889 annot_all = do.call(rbind,annot_list)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
890 total = by(annot_all$Freq,INDICES=with(annot_all, paste(cl_string,domain)), FUN=sum, simplify=TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
891 total_df = data.frame(Freq = c(total), proportion = c(total) /sum(clusters$size))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
892 ## for ploting it need to contain all annot categories as in s upercluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
893 annot_list_full = list()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
894 for (i in seq_along(annot_list)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
895 annot_list_full[[i]] = total_df
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
896 for (j in rownames(total_df)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
897 if (!j %in% rownames(annot_list[[i]])){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
898 annot_list_full[[i]][j,c('Freq','proportion')] = c(0,0)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
899 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
900 annot_list_full[[i]][j,c('Freq','proportion')] = annot_list[[i]][j,c('Freq','proportion')]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
901 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
902 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
903 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
904 names(annot_list_full) = names(annot_list)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
905 return(list (clusters = annot_list_full,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
906 supercluster = total_df
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
907 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
908 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
909 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
910
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
911 plot_supercluster = function(G,color_scheme=c("annotation","comparative")){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
912 ## create plot in coords y: 1-1200, x: 1 - 1400
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
913 ## this is fixed for href to work in exact image areas!
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
914 color_scheme = match.arg(color_scheme)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
915 LIMS0 = matrix(c (1,1000,1,1000), ncol = 2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
916 OFFSET_H = 100; OFFSET_W = 200
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
917 lims = LIMS0 + c(0,OFFSET_W * 2, 0, OFFSET_H * 2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
918 ## use full range
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
919 par(mar=c(0,0,0,0),xaxs="i", yaxs="i")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
920 ## proportion of repeats
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
921 if (color_scheme == "annotation"){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
922 prop = sapply (V(G)$annotation, FUN = function(x)x$proportion)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
923 brv = c("#BBBBBB",unique_colors)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
924 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
925 prop = sapply (V(G)$comparative_counts, FUN = function(x)x$proportion)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
926 brv = unique_colors
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
927 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
928 ## handle special cases - single cluster with annot or single annotation group
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
929 if (is.null(dim(prop))){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
930 prop=matrix(prop, nrow = 1)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
931 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
932 if (length(prop)==0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
933 ## special case - not annotation at all
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
934 prop = matrix(rep(1,vcount(G)),ncol = vcount(G), dimnames=list("NAN", NULL))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
935 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
936 if (color_scheme == "annotation"){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
937 rownames(prop) = rownames(G$annotation)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
938 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
939 rownames(prop) = rownames(G$comparative)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
940 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
941 ## reorder by prop
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
942 if (color_scheme =="annotation"){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
943 prop = prop[order(prop[,1], decreasing = TRUE),,drop=FALSE]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
944 NAN = 1 - colSums(prop)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
945 prop = rbind(NAN, prop)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
946 brv = c("#BBBBBB",unique_colors)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
947 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
948 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
949 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
950 L = G$L + c(OFFSET_H)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
951 ## for href - necessary convert coordinater - image is flipped vertically
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
952 include = rowSums(prop>0.001, na.rm = TRUE)>=1
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
953 include[1] = TRUE
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
954 prop = prop[include, , drop=FALSE]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
955 plot(0,0,type='n', , xlim = lims[,1], ylim = lims[,2])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
956 plot_edges(G, L, lwd = 8)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
957 RADIUS = radius_size(G)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
958 coords = cbind(G$L[,1,drop=FALSE] + 100, 1100 - G$L[,2,drop=FALSE], RADIUS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
959 pieScatter(L[,1], L[,2], t(prop), col = brv,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
960 radius = RADIUS, new.plot = FALSE, border=NA)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
961 ## add ledend
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
962 legend("topright", legend = rownames(prop),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
963 col = brv, pch = 15, cex= 1.5, pt.cex= 3)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
964 ## add cluster labels
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
965 text(x = L[,1], y = L[,2], labels = paste(V(G)$label,"\n",V(G)$size))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
966 return(coords)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
967 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
968
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
969
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
970 radius_size = function(G){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
971 if (vcount(G) == 1) RADIUS=120
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
972 if (vcount(G) == 2) RADIUS=50
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
973 if (vcount(G) %in% 3:8) RADIUS=40
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
974 if (vcount(G) > 8) RADIUS=30
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
975 return(RADIUS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
976 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
977
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
978
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
979 plot_edges = function(G,L,col="#33000040", lwd = 1){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
980 e = get.edgelist(G, names = F)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
981 X0 = L[e[, 1], 1]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
982 Y0 = L[e[, 1], 2]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
983 X1 = L[e[, 2], 1]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
984 Y1 = L[e[, 2], 2]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
985 segments(X0, Y0, X1, Y1, lwd = lwd,col = col)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
986 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
987
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
988
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
989 pieScatter=function(x, y, prop,radius=1, col=NULL, edges=100, new.plot = TRUE, ...){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
990 if (new.plot){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
991 plot(x,y,type='n')
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
992 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
993 for (i in seq_along(x)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
994 p=prop[i,]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
995 if (length(radius)==1){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
996 r=radius
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
997 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
998 r=radius[i]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
999 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1000 include = p != 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1001 floating.pie(x[i], y[i],p[include],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1002 col=col[include], radius=r,edges=50,...)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1003 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1004 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1005
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1006 unique_colors = c("#00FF00", "#0000FF", "#FF0000",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1007 "#FFA6FE", "#FFDB66", "#006401", "#010067", "#95003A",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1008 "#007DB5", "#FF00F6", "#FFEEE8", "#774D00", "#90FB92", "#01FFFE",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1009 "#0076FF", "#D5FF00", "#FF937E", "#6A826C", "#FF029D",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1010 "#FE8900", "#7A4782", "#7E2DD2", "#85A900", "#FF0056",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1011 "#A42400", "#00AE7E", "#683D3B", "#BDC6FF", "#263400",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1012 "#BDD393", "#00B917", "#9E008E", "#001544", "#C28C9F",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1013 "#FF74A3", "#01D0FF", "#004754", "#E56FFE", "#788231",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1014 "#0E4CA1", "#91D0CB", "#BE9970", "#968AE8", "#BB8800",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1015 "#43002C", "#DEFF74", "#00FFC6", "#FFE502", "#620E00",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1016 "#008F9C", "#98FF52", "#7544B1", "#B500FF", "#00FF78",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1017 "#FF6E41", "#005F39", "#6B6882", "#5FAD4E", "#A75740",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1018 "#A5FFD2", "#FFB167", "#009BFF", "#E85EBE")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1019
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1020 if (FALSE){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1021 ## For testing
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1022 for ( i in 1:100){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1023 G = get_supercluster_graph(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1024 sc=i, seqdb = seqdb,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1025 hitsortdb = hitsortdb, classification_hierarchy_file = class_file
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1026 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1027 png(paste0("/mnt/raid/users/petr/tmp/test.figure_",i,".png"), width = 1400, height=1000)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1028 plot_supercluster(G)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1029 dev.off()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1030 print(i)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1031 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1032
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1033 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1034
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1035 plotg = function(GG,LL,wlim=NULL,...){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1036 ## quick ploting function
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1037 e = get.edgelist(GG, names=F)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1038 w = E(GG)$weight
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1039 if (!is.null(wlim)) {e = e[w > wlim,]; w = w[w > wlim]}
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1040 X0 = LL[e[,1],1]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1041 Y0 = LL[e[,1],2]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1042 X1 = LL[e[,2],1]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1043 Y1 = LL[e[,2],2]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1044 plot(range(LL[,1]),range(LL[,2]),xlab="",ylab="",axes=FALSE,type="n",...)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1045 brv = 'grey'
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1046 segments(X0,Y0,X1,Y1,lwd=.5,col=brv)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1047 points(LL,pch=18,cex=.8,...)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1048 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1049
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1050
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1051
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1052
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1053 get_cluster_info = function(index){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1054 ## must be connected to hitsort database ( HITSORT)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1055 x = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1056 paste0("SELECT * FROM cluster_info WHERE [index]=",index))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1057 return(as.list(x))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1058 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1059
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1060 get_supercluster_info = function(sc){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1061 ## must be connected to hitsort database ( HITSORT)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1062 x = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1063 paste0("SELECT * FROM cluster_info WHERE supercluster=",sc))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1064 return(x)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1065 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1066
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1067 get_supercluster_summary = function(sc){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1068 info = get_supercluster_info(sc)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1069 Nclusters = nrow(info)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1070 Nreads = supercluster_size(sc)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1071 N = dbGetQuery(SEQDB, "SELECT count(*) from sequences")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1072 proportion = Nreads/N
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1073 cluster_list = info$index
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1074 return(list(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1075 Nreads = Nreads,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1076 Nclusters = Nclusters,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1077 proportion = proportion,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1078 cluster_list = cluster_list
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1079 ))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1080 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1081
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1082
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1083 get_cluster_connection_info = function(index, search_type=c("pair", "similarity")){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1084 search_type = match.arg(search_type)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1085 if (search_type == "pair"){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1086 x = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1087 sprintf(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1088 "SELECT * FROM cluster_mate_connections WHERE c1==%d OR c2=%d",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1089 index, index)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1090 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1091 if (nrow(x) == 0 ){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1092 ## no connections
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1093 return(NULL)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1094 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1095 other_cl = ifelse(x$c1 == index, x$c2, x$c1)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1096 out = data.frame(cl = other_cl, N = x$N)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1097 out$ids = unlist(strsplit(x$ids, split = ", "))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1098 k = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1099 sprintf(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1100 "SELECT * FROM cluster_mate_bond WHERE c1==%d OR c2=%d",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1101 index, index)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1102 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1103 if (k$c1 != x$c1 || k$c2 !=x$c2){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1104 ## tables not in same order
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1105 stop("tables cluster_mate_bond and cluster_mate_connection are not in the same order")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1106 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1107 out$k = k$k
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1108 return(out)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1109 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1110 x = dbGetQuery(HITSORTDB,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1111 sprintf(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1112 "SELECT * FROM cluster_connections WHERE c1==%d OR c2=%d",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1113 index, index)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1114 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1115 if (nrow(x) == 0 ){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1116 ## no connections
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1117 return(NULL)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1118 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1119 other_cl = ifelse(x$c1 == index, x$c2, x$c1)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1120 out = data.frame(cl = other_cl, N = x$"count(*)")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1121 return(out)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1122 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1123 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1124
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1125
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1126 format_clinfo=function(x){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1127 include = c(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1128 "size",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1129 "size_real",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1130 "ecount",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1131 "supercluster",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1132 "annotations_summary",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1133 "ltr_detection",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1134 "pair_completeness",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1135 "TR_score",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1136 "TR_monomer_length",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1137 "loop_index",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1138 "satellite_probability",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1139 "TR_consensus",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1140 "tandem_rank",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1141 "orientation_score"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1142 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1143 new_names = c(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1144 "TR_consensus" = "TAREAN consensus",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1145 "tandem_rank" = "TAREAN_annotation",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1146 "ltr_detection" = "PBS/LTR",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1147 'size' = 'number of reads in the graph',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1148 'size_real' = 'the total number of reads in the cluster',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1149 'ecount' = "number of edges of the graph:",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1150 "annotations_summary" = "similarity based annotation"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1151 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1152 values = unlist(x[include]) %>% gsub("\n","<br>", .)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1153 include = replace(include, match(names(new_names),include), new_names)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1154 names(values) = include
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1155 ## replace tandem rank with annotation description
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1156 values["TAREAN_annotation"] = RANKS_TANDEM[values["TAREAN_annotation"]]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1157 ## create HTML table without header
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1158 desc = df2html(data.frame(names(values), values))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1159 return(desc)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1160 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1161
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1162 create_cluster_report = function(index,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1163 seqdb, hitsortdb,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1164 classification_hierarchy_file,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1165 HTML_LINKS){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1166 ## index - index of cluster, integer
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1167 ## seqdb, hitsortdb - sqlite databases
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1168 ## classification_hierarchy_file - rds file with classification
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1169 ## other information about cluster is collected from HITSORTDB
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1170 connect_to_databases(seqdb, hitsortdb, classification_hierarchy_file)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1171 ## basic info about cluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1172 clinfo = get_cluster_info(index)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1173 HTML_LINKS = nested2named_list(HTML_LINKS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1174 PNGWIDTH = 1000 # this must be kept so than link in png work correctly
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1175 PNGHEIGHT = 1000
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1176 LEGENDWIDTH = 300
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1177 PS = 30 ## pointsize for plotting
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1178 MAX_N = 10 ## to show mates or similarities connections
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1179
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1180 #################################################################################
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1181 ## create HTML title:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1182 title = paste("Cluster no. ",index)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1183 htmlheader = gsub("PAGE_TITLE", title, HTMLHEADER)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1184 cat(htmlheader, file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1185 file.copy(paste0(WD,"/style1.css"), clinfo$dir)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1186 HTML.title(title, file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1187 ## print basic info to HTML header:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1188 ## make link back to cluster table
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1189 hwrite("Go back to cluster table <br><br>\n", link=HTML_LINKS$CLUSTER_TO_CLUSTER_TABLE) %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1190 cat(file = clinfo$html_report_main, append =TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1191 ## create ling to corresponding supercluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1192 cat("Cluster is part of ",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1193 hwrite (paste("supercluster: ", clinfo$supercluster),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1194 link=sprintf(HTML_LINKS$CLUSTER_TO_SUPERCLUSTER, clinfo$supercluster)),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1195 "\n<br><br>\n",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1196 file = clinfo$html_report_main, append =TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1197 HTML.title("Cluster characteristics:", HR = 3, file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1198 cat(format_clinfo(clinfo), file = clinfo$html_report_main, append =TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1199 #################################################################################
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1200 ## add link to image with graph layout with domains
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1201 fs = list(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1202 graph_domains = paste0(clinfo$html_report_files,"/graph_domains.png"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1203 graph_mates = paste0(clinfo$html_report_files,"/graph_mates%04d_%04d.png"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1204 graph_base = paste0(clinfo$html_report_files,"/graph_base.png"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1205 graph_comparative = paste0(clinfo$html_report_files,"/graph_comparative.png")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1206 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1207 fs_relative = list(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1208 graph_domains = paste0(basename(clinfo$html_report_files),"/graph_domains.png"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1209 graph_comparative = paste0(basename(clinfo$html_report_files),"/graph_comparative.png"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1210 graph_mates = paste0(basename(clinfo$html_report_files),"/graph_mates%04d_%04d.png")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1211 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1212
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1213 dir.create(clinfo$html_report_files)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1214 load(clinfo$graph_file)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1215 annot = cluster_annotation(index)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1216 annot_summary = summarize_annotation(annot, clinfo$size_real)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1217 vertex_colors = annot2colors(annot,ids = V(GL$G)$name)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1218
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1219 color_proportions = sort(table(vertex_colors$color_table)) / clinfo$size_real
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1220 ## hits with smaller proportion are not shown!
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1221 exclude_colors = names(color_proportions)[color_proportions < 0.001]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1222 vertex_colors$color_table[vertex_colors$color_table %in% exclude_colors] = "#00000050"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1223 vertex_colors$legend = vertex_colors$legend[!vertex_colors$legend %in% exclude_colors]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1224
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1225 if (is_comparative()){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1226 comp_codes = get_comparative_codes()$prefix
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1227 colors = unique_colors[seq_along(comp_codes)]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1228 names(colors) = comp_codes
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1229 species = substring(V(GL$G)$name,1, nchar(comp_codes[1]))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1230 ## create image with highlighted domains
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1231 png(fs$graph_comparative, width = PNGWIDTH + LEGENDWIDTH, height = PNGHEIGHT, pointsize = PS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1232 layout(matrix(1:2, ncol = 2), width = c(10, 3))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1233 plotg(GL$G,GL$L, col = colors[species])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1234 par(mar = c(0, 0, 0, 0))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1235 plot.new()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1236 legend("topleft", col=colors,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1237 legend = names(colors),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1238 pch = 15, cex = 0.7)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1239 dev.off()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1240 HTML.title("comparative analysis:", HR=4, file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1241 html_insert_image(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1242 img_file = fs_relative$graph_comparative,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1243 htmlfile = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1244 ## comparative analysis - calculate statistics:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1245 V1V2 = substring(get.edgelist(GL$G),1, nchar(comp_codes[1]))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1246 C_observed = table(data.frame(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1247 V1=factor(V1V2[,1],levels = comp_codes),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1248 V2=factor(V1V2[,2],levels = comp_codes)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1249 ))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1250 C_observed = as.data.frame.matrix(C_observed + t(C_observed))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1251 P_observed = as.data.frame.matrix(C_observed/sum(C_observed))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1252 Edge_propotions = table(factor(V1V2, levels = comp_codes))/(length(V1V2))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1253 P_expected = matrix(Edge_propotions,ncol=1) %*% matrix(Edge_propotions, nrow=1)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1254
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1255 spec_counts = df2html(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1256 data.frame(table(factor(species, levels=comp_codes))),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1257 header = c("Species", "Read count")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1258 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1259
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1260 edge_count = df2html(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1261 data.frame(species = comp_codes, (C_observed/2)[,]),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1262 header = c("", comp_codes)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1263 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1264
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1265 P_OE = df2html(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1266 data.frame(species=comp_codes,(P_observed/P_expected[,])),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1267 header = c("", comp_codes)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1268 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1269
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1270 HTML.title("Comparative analysis - species read counts:", HR =5, file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1271 cat(spec_counts,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1272 file = clinfo$html_report_main, append =TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1273
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1274 # show connection only if more than one species in cluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1275 if (length(unique(species))>1){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1276 HTML.title("comparative analysis - number of edges between species:",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1277 HR =5, file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1278 cat(edge_count,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1279 file = clinfo$html_report_main, append = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1280
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1281 HTML.title("comparative analysis - observed/expected number of edges between species",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1282 HR =5, file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1283 cat(P_OE,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1284 file = clinfo$html_report_main, append = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1285 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1286
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1287 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1288
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1289
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1290 ## create image with highlighted domains
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1291 png(fs$graph_domains, width = PNGWIDTH + LEGENDWIDTH, height = PNGHEIGHT, pointsize = PS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1292 layout(matrix(1:2, ncol = 2), width = c(10, 3))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1293 plotg(GL$G,GL$L, col = vertex_colors$color_table)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1294 domains_detected = length(vertex_colors$legend) > 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1295 par(mar = c(0, 0, 0, 0))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1296 plot.new()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1297 if (domains_detected){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1298 # domains found
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1299 legend("topleft", col=vertex_colors$legend,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1300 legend = names(vertex_colors$legend),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1301 pch = 15, cex = 0.7)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1302 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1303 dev.off()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1304
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1305 HTML.title("protein domains:", HR=4, file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1306 if (!domains_detected){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1307 HTML("No protein domains detected", file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1308 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1309 HTML("protein domains:", HR=4, file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1310 html_insert_image(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1311 img_file = fs_relative$graph_domains,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1312 htmlfile = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1313
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1314 #############################################################################
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1315 if (nrow(annot_summary) == 0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1316 HTML.title("Reads annotation summary", HR = 3, file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1317 HTML("No similarity hits to repeat databases found", file = clinfo$html_report_main)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1318 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1319 HTML(annot_summary, file = clinfo$html_report_main, align = "left")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1320 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1321
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1322
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1323 ## similarity and mate cluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1324 mate_clusters = get_cluster_connection_info(index, search_type="pair")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1325 similar_clusters = get_cluster_connection_info(index, search_type="similarity")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1326 ## report mate and similarity clusters
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1327 if (!is.null(similar_clusters)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1328 HTML.title("clusters with similarity:", file =clinfo$html_report_main, HR = 3)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1329 cat(df2html(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1330 similar_clusters,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1331 header=c("Cluster","Number of similarity hits"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1332 sort_col = "N", scroling = TRUE
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1333 ),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1334 file =clinfo$html_report_main, append=TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1335 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1336 if (!is.null(mate_clusters)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1337 HTML.title("clusters connected through mates:", file =clinfo$html_report_main, HR = 3)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1338 cat(df2html(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1339 mate_clusters[,c('cl','N','k')],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1340 header = c('Cluster','Number of shared<br> read pairs','k'),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1341 sort_col = "N", scroling = TRUE
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1342 ),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1343 file = clinfo$html_report_main,append = TRUE
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1344 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1345
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1346 ## create base graph images - it will serve as background for
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1347 ## mate clusters plots
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1348 png(fs$graph_base,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1349 width = PNGWIDTH, height = PNGHEIGHT, pointsize = PS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1350 par(mar=c(0,0,0,0),xaxs="i",yaxs="i")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1351 plotg(GL$G,GL$L, col = "#00000050")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1352 dev.off()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1353 ## load base as raster image
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1354 base_image = readPNG(fs$graph_base)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1355
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1356 for (i in order(mate_clusters$N, decreasing = TRUE)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1357 mate_ids = unlist(strsplit(mate_clusters$ids[[i]],split=","))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1358 ## print only graph above MAX_N mates
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1359 if (length(mate_ids) < MAX_N){ # TODO - use constant
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1360 next
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1361 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1362 png(sprintf(fs$graph_mates,index, mate_clusters$cl[i]),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1363 width = PNGWIDTH, height = PNGHEIGHT, pointsize = PS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1364 color_mate = gsub(".$","",V(GL$G)$name) %in% mate_ids %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1365 ifelse("#FF0000FF", "#000000AA")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1366 par(mar=c(0,0,0,0),xaxs="i",yaxs="i")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1367 plot(range(GL$L[,1]), range(GL$L[,2]), type = "n", xlab = "", ylab = "", axes = FALSE,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1368 main = paste0("CL",index," ----> CL",mate_clusters$cl[i]))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1369 rasterImage(base_image,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1370 range(GL$L[,1])[1], range(GL$L[,2])[1],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1371 range(GL$L[,1])[2], range(GL$L[,2])[2]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1372 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1373 points(GL$L[,1:2], col = color_mate,pch=18,cex=.8)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1374 dev.off()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1375 title = paste0("CL",index," ----> CL",mate_clusters$cl[i])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1376 footer = paste0("No. of shared pairs: :", mate_clusters$N[i])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1377 html_insert_floating_image(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1378 img_file = sprintf(fs_relative$graph_mates, index, mate_clusters$cl[i]),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1379 htmlfile = clinfo$html_report_main, width = 200,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1380 title = title, footer = footer
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1381 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1382 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1383 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1384 }