annotate lib/reporting.R @ 0:1d1b9e1b2e2f draft

Uploaded
author petr-novak
date Thu, 19 Dec 2019 10:24:45 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env Rscript
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
2 library(R2HTML)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
3 library(hwriter)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
4 library(DT)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
5 library(tools)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
6
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
7 source("htmlheader.R")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
8 source("config.R") # load TANDEM_RANKS
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
9 source("utils.R")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
10 DT_OPTIONS = list(pageLength = 1000, lengthMenu = c(10, 50, 100, 1000, 5000, 10000))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
11 HTMLHEADER_TAREAN = gsub("PAGE_TITLE","TAREAN summary", htmlheader)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
12 HTMLHEADER_INDEX = gsub("PAGE_TITLE","Clustering summary", htmlheader)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
13
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
14 WD = getwd() # to get script directory when run from Rserve
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
15
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
16 reformat_header = function(df){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
17 H = colnames(df)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
18 H[H=="TR_score"] = "TAREAN k-mer_coverage"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
19 H[H=="vcount"] = "|V|"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
20 H[H=="ecount"] = "|E|"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
21 H[H=="Genome_Proportion[%]"] = "Proportion[%]"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
22 H[H=="Proportion_Adjusted[%]"] = "Proportion adjusted[%]"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
23 H[H=="supercluster"] = "Super_cluster"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
24 H[H=="size_real"] = "Number of reads"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
25 H[H=="TR_monomer_length"] = "Consensus_length"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
26 H[H=="TR_consensus"] = "Consensus"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
27 H[H=="pbs_score"] = "PBS score"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
28 H[H=="ltr_detection"] = "LTR detection"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
29 H[H=="kmer_analysis"] = "TAREAN k-mer analysis"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
30
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
31 # H[H=="annotations_summary"] = "Similarity_hits"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
32 H[H=="annotations_summary"] = "Similarity_hits_[above 0.1%]"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
33 H[H=="annotations_summary_custom"] = "Similarity_hits_to_custom_database"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
34 H[H=="loop_index"] = "connected_component_index C"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
35 H[H=="pair_completeness"] = "pair_completeness_index_P"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
36 H = gsub("_","<br>",H)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
37 H=gsub("TR_","",H)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
38 H = capitalize(H)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
39 colnames(df) = H
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
40 return(df)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
41 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
42
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
43 reformat4html=function(df){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
44 for (n in colnames(df)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
45 if (class(df[,n]) == 'character'){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
46 df[,n] = gsub("\n","<br>", df[,n])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
47 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
48 if (class(df[,n]) == 'numeric'){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
49 df[,n] = signif(df[,n],3)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
50 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
51 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
52 return(df)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
53 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
54
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
55 capitalize = function(s){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
56 paste(toupper(substring(s, 1, 1)),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
57 substring(s, 2),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
58 sep="")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
59 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
60
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
61
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
62 create_main_reports = function(paths, N_clustering, N_input,N_omit, merge_threshold,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
63 paired, consensus_files, custom_db, tarean_mode,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
64 HTML_LINKS, pipeline_version_info, max_memory,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
65 max_number_reads_for_clustering, mincln){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
66 ## this create main html index and also tarean report ##
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
67 ## index and tarean html reports are created always
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
68 ## extract all paths and directories
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
69 HTML_LINKS = nested2named_list(HTML_LINKS)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
70 paths = nested2named_list(paths)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
71 csvfile = paths[['clusters_info']]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
72 clusters_summary_csv = paths[['clusters_summary_csv']]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
73 profrep_classification_csv = paths[['profrep_classification_csv']]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
74 htmlfile = paths[["tarean_report_html"]]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
75 html_report_dt = paths[["cluster_report_html"]]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
76 main_report = paths[["main_report_html"]]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
77 summarized_annnotation_html = paths[["summarized_annotation_html"]]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
78 libdir = paths[['libdir']]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
79 clusters_dir = paths[["clusters__relative"]]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
80 superclusters_dir = paths[['superclusters__relative']]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
81 seqdb = paths[['sequences_db']]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
82 hitsortdb = paths[['hitsort_db']]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
83 connect_to_databases(seqdb, hitsortdb)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
84 dfraw = read.table(csvfile, as.is=TRUE, header=TRUE, sep="\t", na.strings = c('None','NA'))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
85 # table must be updated
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
86 dfraw$supercluster_best_hit = dbGetQuery(HITSORTDB, "SELECT supercluster_best_hit FROM cluster_info")[, 1]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
87 ## columns to use
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
88 selected_cols = c("index", "size_real","size_adjusted", "vcount","ecount",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
89 "loop_index", "pair_completeness",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
90 'satellite_probability','satellite',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
91 'TR_score','pbs_score','ltr_detection', 'TR_monomer_length',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
92 'TR_consensus', "annotations_summary", "supercluster", 'tandem_rank',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
93 'supercluster_best_hit')
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
94
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
95 ## some columns are added (like Graph_layout, clusters,...)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
96 ## columns for html report
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
97 selected_cols_tarean = c(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
98 "Cluster",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
99 "Proportion[%]",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
100 "Proportion_Adjusted[%]",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
101 "size_real",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
102 'satellite_probability',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
103 'TR_monomer_length',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
104 'TR_consensus',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
105 'Graph_layout',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
106 'kmer_analysis',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
107 "loop_index",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
108 "pair_completeness",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
109 'TR_score',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
110 "vcount",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
111 "ecount",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
112 'pbs_score',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
113 "annotations_summary"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
114 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
115 selected_cols_main = c(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
116 "Cluster",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
117 "supercluster",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
118 "Proportion[%]",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
119 "Proportion_Adjusted[%]",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
120 "size_real",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
121 'Graph_layout',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
122 "annotations_summary",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
123 'ltr_detection',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
124 'satellite_probability',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
125 'TAREAN_annotation',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
126 'TR_monomer_length',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
127 'TR_consensus',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
128 'kmer_analysis',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
129 "loop_index",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
130 "pair_completeness",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
131 'TR_score',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
132 "ecount",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
133 "vcount"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
134 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
135
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
136 if (custom_db){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
137 selected_cols_main = c(selected_cols_main, "annotations_summary_custom")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
138 selected_cols_tarean = c(selected_cols_tarean, "annotations_summary_custom")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
139 selected_cols = c(selected_cols, "annotations_summary_custom")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
140 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
141 if (is_comparative()){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
142 prefix_codes = dbGetQuery(SEQDB, "SELECT * FROM prefix_codes")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
143 species_counts = dbGetQuery(HITSORTDB, "SELECT * FROM comparative_counts")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
144 superclusters = dbGetQuery(HITSORTDB,paste(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
145 "SELECT supercluster, cluster FROM superclusters WHERE cluster <=",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
146 nrow(species_counts))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
147 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
148 species_counts = merge(superclusters, species_counts, by.x = "cluster", by.y = "clusterindex")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
149 ## include commented header with total counts:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
150 cat("# Total counts:\t\t", paste(prefix_codes$N, collapse="\t"),"\n#\n",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
151 sep="",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
152 file = paths[['comparative_analysis_counts_csv']])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
153
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
154 write.table(species_counts, file = paths[['comparative_analysis_counts_csv']],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
155 sep = "\t", col.names = TRUE, row.names = FALSE, append=TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
156 species_counts_formated = apply(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
157 species_counts[, prefix_codes$prefix, drop = FALSE],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
158 1, function(x) paste(prefix_codes$prefix, ":", x, "\n",sep='', collapse=""))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
159 dfraw$species_counts = species_counts_formated[1:nrow(dfraw)]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
160 selected_cols = c(selected_cols, "species_counts")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
161 selected_cols_main = c(selected_cols_main, "species_counts")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
162 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
163
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
164
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
165 df_report = dfraw[,selected_cols]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
166 ## describe tandem ranks:
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
167 df_report$TAREAN_annotation = RANKS_TANDEM[as.character(df_report$tandem_rank)]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
168 ## remove Cluster_similarity_hits
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
169 df_report_csv = reformat_df_report(df_report)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
170 df_report_csv = df_report_csv[,!colnames(df_report_csv) %in% "Cluster_similarity_hits"]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
171 df_report_csv$Final_annotation=""
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
172
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
173 ## make table for profrep classification
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
174 write.table(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
175 reformat_df_to_profrep_classification(df_report), file = profrep_classification_csv,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
176 sep = "\t", col.names = FALSE, row.names = FALSE, quote = FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
177
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
178 df_report$"kmer_analysis" = ifelse(dfraw$putative_tandem, hwrite("report", link = dfraw$html_tarean), "N/A")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
179 df_report$"Graph_layout" = hwriteImage(dfraw$image_file_tmb, link = dfraw$image_file, table = FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
180 df_report$Cluster = paste0("CL", df_report$index)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
181 df_report$"Proportion[%]" = signif (100 * df_report$size_real / N_clustering, 2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
182 df_report$"Proportion_Adjusted[%]" = signif (100 * df_report$size_adjusted / N_clustering, 2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
183 if (!tarean_mode){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
184
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
185 df_report$Cluster=sapply(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
186 df_report$index,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
187 function(x) hwrite(x, link = sprintf("%s/dir_CL%04d/index.html", clusters_dir, x)))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
188
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
189 df_report$supercluster = sapply(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
190 df_report$supercluster,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
191 function(x) hwrite(x, link = sprintf("%s/dir_SC%04d/index.html", superclusters_dir, x)))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
192 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
193 ## TAREAN report
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
194 ## copy tarean output data help to place nad make link to it
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
195 file.copy(paste0(WD,"/style1.css"), dirname(htmlfile))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
196 file.copy(paste0(WD,"/documentation.html"), dirname(htmlfile))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
197
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
198 tarean_html = start_html(htmlfile, HTMLHEADER_TAREAN)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
199 tarean_html("Tandem Repeat Analyzer", HTML.title, HR=1)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
200 tarean_html = start_html(htmlfile, HTMLHEADER_TAREAN)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
201 tarean_html('Run statistics:', HTML.title, HR=2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
202 tarean_html(paste("Number of input reads:", N_input ))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
203 tarean_html(paste("Number of analyzed reads:", N_clustering))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
204 tarean_html(paste("Cluster merging:",ifelse(merge_threshold == 0,"No", "Yes")))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
205
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
206 ## export links to consensus sequecnes in fasta files
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
207 tarean_html("Consensus files - fasta format:", HTML.title, HR=2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
208 for (i in TANDEM_RANKS[TANDEM_RANKS != 0]){ ## no consensus for rank 0
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
209 if (!is.null (consensus_files[[i]])){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
210 N = sum(dfraw$tandem_rank == TANDEM_RANKS[i])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
211 name_string = paste(names(TANDEM_RANKS)[i]," - total ", N, "found" )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
212 tarean_html(paste("<p>",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
213 hwrite(name_string, download = name_string,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
214 link = basename(consensus_files[[i]][[1]])),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
215 "<br>\n"))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
216
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
217 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
218 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
219 ## print link to documentation ##
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
220 tarean_html("Documentation", HTML.title, HR=2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
221 tarean_html(paste('<p> For the explanation of TAREAN output see',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
222 ' <a href="documentation.html#tra" > the help section </a> <p>'))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
223 ## HOW TO CITE section)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
224
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
225 ## PRINT TABLES WITH CLUSTERS
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
226 for (n in names(TANDEM_RANKS)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
227 tarean_html(n, HTML.title, HR=2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
228 inc <- dfraw$tandem_rank == TANDEM_RANKS[n]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
229 if (sum(inc > 0)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
230 tarean_html(reformat4html(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
231 reformat_header(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
232 df_report[inc, selected_cols_tarean ,drop=FALSE]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
233 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
234 ),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
235 align = "left", digits = 3)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
236 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
237 tarean_html("not found")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
238 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
239 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
240
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
241 ## export table with all cluster
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
242 cat("",file = html_report_dt)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
243
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
244 DT_instance = df_report[,selected_cols_main, drop = FALSE] %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
245 reformat_header %>% reformat4html %>% datatable(escape = FALSE, options = DT_OPTIONS) %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
246 formatStyle(columns = seq_along(selected_cols), "font-size" = "12px") %>%
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
247 formatStyle(columns = "Similarity<br>hits<br>[above 0.1%]", "min-width" = "500px")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
248
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
249 saveWidget(DT_instance, file = normalizePath(html_report_dt),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
250 libdir=normalizePath(libdir) , selfcontained = FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
251
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
252 add_preamble(normalizePath(html_report_dt),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
253 preamble='<h2>Cluster annotation</h2> <p><a href="documentation.html#clust"> For table legend see documentation. <a> </p>')
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
254
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
255 ## Main page - Clustering info - global information about clustering
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
256 top_clusters_prop = sum(df_report$size_real)/N_clustering
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
257 clustering_info = summary_histogram(fn = paths[["summary_histogram"]], N_clustering, N_omit, df_report$size_adjusted,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
258 top_clusters_prop)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
259 index_html = start_html(main_report, HTMLHEADER_INDEX)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
260 index_html("Clustering Summary", HTML.title, HR = 1)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
261
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
262 index_html(paste0('<a href="',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
263 paths[['summary_histogram__relative']],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
264 '"> <img src="', paths[['summary_histogram__relative']],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
265 '" width="700" border="1" >',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
266 ' </a>'), cat)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
267 index_html('<p> <b> Graphical summary of the clustering results. </b> Bars represent superclusters, with their heights and widths corresponding to the numbers of reads in the superclusters (y-axis) and to their proportions in all analyzed reads (x-axis), respectively. Rectangles inside the supercluster bars represent individual clusters. If the filtering of abundant satellites was performed, the affected clusters are shown in green, and their sizes correspond to the adjusted values. Blue and pink background panels show proportions of reads that were clustered and remained single, respectively. Top clusters are on the left of the dotted line. </p><hr><br><br>',cat)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
268
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
269 index_html('Run information:', HTML.title, HR = 2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
270 index_html(paste("Number of input reads:", N_input ))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
271 index_html(paste("Number of analyzed reads:", N_clustering))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
272 if (N_omit != 0){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
273 index_html(paste("Number of reads removed by automatic filtering of abundant putative satellites:", N_omit))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
274 index_html(paste("Number of remaining reads after filtering of abundant satellites:", N_clustering - N_omit ))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
275 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
276
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
277 index_html(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
278 paste(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
279 "Proportion of reads in top clusters :",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
280 signif(100 * sum(df_report$size_real)/N_clustering,2),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
281 "%"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
282 ))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
283 index_html(paste("Cluster merging:",ifelse(merge_threshold == 0,"No", "Yes")))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
284 index_html(paste("Paired-end reads:",ifelse(paired, "Yes", "No")))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
285 index_html("Available analyses:", HTML.title, HR=2)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
286 index_html(paste("<p>",hwrite("Tandem repeat analysis", link = HTML_LINKS$INDEX_TO_TAREAN),"</p>"),cat)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
287
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
288 if (!tarean_mode){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
289 index_html(paste("<p>", hwrite("Cluster annotation", link = HTML_LINKS$INDEX_TO_CLUSTER_REPORT),"</p>"),cat)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
290 index_html(paste("<p>", hwrite("Supercluster annotation",
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
291 link = HTML_LINKS$INDEX_TO_SUPERCLUSTER_REPORT),"</p>"),cat)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
292 index_html(paste("<p>", hwrite("Repeat annotation summary", link = HTML_LINKS$INDEX_TO_SUMMARIZED_ANNOTATION),"</p>"),cat)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
293 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
294
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
295 if (is_comparative()) {
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
296 tryCatch({
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
297 imagemap = plot_rect_map(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
298 read_counts = paths[['comparative_analysis_counts_csv']],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
299 cluster_annotation = paths[['profrep_classification_csv']],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
300 output_file = paths[['comparative_summary_map']]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
301 )},
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
302 error = function(err){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
303 print(paste("error while plotting ", err))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
304 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
305 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
306
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
307 HTML.title("Comparative analysis - Total number of reads in clustering analysis", file = main_report)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
308 index_html(df2html(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
309 prefix_codes,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
310 header = c("Code", "Total read count"), rounding_function = round),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
311 cat
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
312 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
313 HTML.title("Comparative analysis - Number of reads in individual clusters", file = main_report)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
314
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
315 index_html(paste0('<img src="', paths[['comparative_summary_map__relative']],
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
316 '" usemap ="#clustermap" border="2">'), cat)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
317
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
318 index_html(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
319 "Bar plot on top shows the size of individual clusters. Size of the rectangles in lower panel is proportional to the number of reads in a cluster for each species. Clusters and species were sorted using hierarchical clustering. Bars and rectangles in the plot are hyperlinked to the individual cluster reports.")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
320 index_html(imagemap)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
321 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
322
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
323 how2cite = readLines(paths[["how_to_cite"]])
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
324
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
325 index_html(how2cite, cat, sep="\n")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
326 index_html("<br><hr>", cat)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
327 index_html('Details:', HTML.title, HR = 3)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
328 index_html(pipeline_version_info %>% preformatted, cat)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
329 index_html(paste0("Minimal number of reads in cluster to be considered top cluster : ", mincln))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
330 index_html(paste0("Reserved Memory : ", round(max_memory/(1024*1024)), "G"))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
331 index_html(paste0("Maximum number of processable reads with the reserved memory : ", max_number_reads_for_clustering))
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
332
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
333
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
334 ## export to csv
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
335 clustering_info$Number_of_analyzed_reads = N_clustering
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
336 write.table(t(as.data.frame(clustering_info)),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
337 file = clusters_summary_csv, sep="\t", col.names = FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
338 cat("\n", file = clusters_summary_csv, append = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
339 write.table(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
340 df_report_csv, file = clusters_summary_csv,
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
341 sep = "\t", col.names = TRUE, row.names = FALSE, quote = TRUE, append=TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
342 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
343
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
344 dummy_function = function(){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
345 print("dummy function")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
346 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
347
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
348 reformat_df_report = function(df_report){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
349 # for printing to csv - this should be consise
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
350 df_report$TR_consensus = gsub("(<pre>)|(</pre>)","",df_report$TR_consensus)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
351 df_report$tandem_rank = NULL
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
352 ## make suitable order and rename
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
353 if ("annotations_summary_custom" %in% colnames(df_report)){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
354 custom = "annotations_summary_custom"
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
355 }else{
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
356 custom=character()
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
357 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
358 df_out = df_report[,c('index',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
359 'supercluster',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
360 'size_real',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
361 'size_adjusted',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
362 'supercluster_best_hit',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
363 'TAREAN_annotation',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
364 'annotations_summary',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
365 custom)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
366 ]
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
367
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
368 colnames(df_out) = c('Cluster',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
369 'Supercluster',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
370 'Size',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
371 'Size_adjusted',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
372 'Automatic_annotation',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
373 'TAREAN_annotation',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
374 'Cluster_similarity_hits',
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
375 custom)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
376 return(df_out)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
377 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
378
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
379 reformat_df_to_profrep_classification = function(df_report){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
380 CL = df_report$index
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
381 best_hit = df_report$supercluster_best_hit
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
382 ## format conversion(in order):
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
383 replacement = list(
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
384 c("/", "|"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
385 c("Ty1_copia", "Ty1/copia"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
386 c("Ty3_gypsy", "Ty3/gypsy"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
387 c("TatIV_Ogre", "TatIV/Ogre"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
388 c("Ogre_Tat", "Ogre/Tat"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
389 c("EnSpm_CACTA", "EnSpm/CACTA"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
390 c("MuDR_Mutator", "MuDR/Mutator"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
391 c("PIF_Harbinger", "PIF/Harbinger"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
392 c("Tc1/Mariner", "Tc1/Mariner"),
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
393 c("All|", "")
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
394 )
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
395 for (i in replacement){
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
396 best_hit = gsub(i[1], i[2], best_hit, fixed = TRUE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
397 }
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
398 best_hit = gsub("^All", "", best_hit, fixed = FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
399 best_hit = ifelse(best_hit == "", paste0("unknown_CL", CL), best_hit)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
400 output = data.frame(Cluster = CL, classification = best_hit, stringsAsFactors = FALSE)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
401 return(output)
1d1b9e1b2e2f Uploaded
petr-novak
parents:
diff changeset
402 }