Mercurial > repos > dlalgroup > pmids_to_pubtator_matrix
comparison simtext_app.R @ 0:3f4adc85ba5d draft
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
author | dlalgroup |
---|---|
date | Thu, 24 Sep 2020 02:01:50 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3f4adc85ba5d |
---|---|
1 #!/usr/bin/env Rscript | |
2 ### SimText App ### | |
3 # | |
4 # The tool enables the exploration of data generated by text_to_wordmatrix or pmids_to_pubtator_matrix in a locally run ShinyApp. Features are word clouds for each initial search query, dimension reduction and hierarchical clustering of the binary matrix, and a table with words and their frequency among the search queries. | |
5 # | |
6 # Input: | |
7 # | |
8 # 1) Input 1: | |
9 # Tab-delimited table with | |
10 # - column with search queries starting with "ID_", e.g. "ID_gene" if initial search queries were genes | |
11 # - column(s) with grouping factor(s) to compare pre-existing categories of the initial search queries with the grouping based on text. The column names should start with "GROUPING_". If the column name is "GROUPING_disorder", "disorder" will be shown as a grouping variable in the app. | |
12 # 2) Input 2: | |
13 # Output of text_to_wordmatrix or pmids_to_pubtator_matrix, or binary matrix. | |
14 # | |
15 # optional arguments: | |
16 # -h, --help show help message | |
17 # -i INPUT, --input INPUT input file name. add path if file is not in working directory | |
18 # -m MATRIX, --matrix MATRIX matrix file name. add path if file is not in working directory | |
19 # -p PORT, --port PORT specify port, otherwise randomly selected | |
20 # | |
21 #Output: | |
22 #Shiny app with word clouds, dimensionality reduction plot, dendrogram of hierarchical clustering and table with words and their frequency among the entities. | |
23 # | |
24 #Packages | |
25 | |
26 if ( '--install_packages' %in% commandArgs()) { | |
27 print('Installing packages') | |
28 if (!require('shiny')) install.packages('shiny', repo="http://cran.rstudio.com/"); | |
29 if (!require('plotly')) install.packages('plotly', repo="http://cran.rstudio.com/"); | |
30 if (!require('DT')) install.packages('DT', repo="http://cran.rstudio.com/"); | |
31 if (!require('shinycssloaders')) install.packages('shinycssloaders', repo="http://cran.rstudio.com/"); | |
32 if (!require('shinythemes')) install.packages('shinythemes', repo="http://cran.rstudio.com/"); | |
33 if (!require('tableHTML')) install.packages('tableHTML', repo="http://cran.rstudio.com/"); | |
34 if (!require('argparse')) install.packages('argparse', repo="http://cran.rstudio.com/"); | |
35 if (!require('PubMedWordcloud')) install.packages('PubMedWordcloud', repo="http://cran.rstudio.com/"); | |
36 if (!require('ggplot2')) install.packages('ggplot2', repo="http://cran.rstudio.com/"); | |
37 if (!require('stringr')) install.packages('stringr', repo="http://cran.rstudio.com/"); | |
38 if (!require('tidyr')) install.packages('tidyr', repo="http://cran.rstudio.com/"); | |
39 if (!require('magrittr')) install.packages('magrittr', repo="http://cran.rstudio.com/"); | |
40 if (!require('plyr')) install.packages('plyr', repo="http://cran.rstudio.com/"); | |
41 if (!require('ggpubr')) install.packages('ggpubr', repo="http://cran.rstudio.com/"); | |
42 if (!require('rafalib')) install.packages('rafalib', repo="http://cran.rstudio.com/"); | |
43 if (!require('RColorBrewer')) install.packages('RColorBrewer', repo="http://cran.rstudio.com/"); | |
44 if (!require('dendextend')) install.packages('dendextend', repo="http://cran.rstudio.com/"); | |
45 if (!require('Rtsne')) install.packages('Rtsne', repo="http://cran.rstudio.com/"); | |
46 if (!require('umap')) install.packages('umap', repo="http://cran.rstudio.com/"); | |
47 } | |
48 | |
49 suppressPackageStartupMessages(library("shiny")) | |
50 suppressPackageStartupMessages(library("plotly")) | |
51 suppressPackageStartupMessages(library("DT")) | |
52 suppressPackageStartupMessages(library("shinycssloaders")) | |
53 suppressPackageStartupMessages(library("shinythemes")) | |
54 suppressPackageStartupMessages(library("tableHTML")) | |
55 suppressPackageStartupMessages(library("argparse")) | |
56 suppressPackageStartupMessages(library("PubMedWordcloud")) | |
57 suppressPackageStartupMessages(library("ggplot2")) | |
58 suppressPackageStartupMessages(library("stringr")) | |
59 suppressPackageStartupMessages(library("tidyr")) | |
60 suppressPackageStartupMessages(library("magrittr")) | |
61 suppressPackageStartupMessages(library("plyr")) | |
62 suppressPackageStartupMessages(library("ggpubr")) | |
63 suppressPackageStartupMessages(library("rafalib")) | |
64 suppressPackageStartupMessages(library("RColorBrewer")) | |
65 suppressPackageStartupMessages(library("dendextend")) | |
66 suppressPackageStartupMessages(library("Rtsne")) | |
67 suppressPackageStartupMessages(library("umap")) | |
68 | |
69 #command arguments | |
70 parser <- ArgumentParser() | |
71 parser$add_argument("-i", "--input", | |
72 help = "input file name. add path if file is not in working directory") | |
73 parser$add_argument("-m", "--matrix", default= NULL, | |
74 help = "matrix file name. add path if file is not in working directory") | |
75 parser$add_argument("--host", default=NULL, | |
76 help="Specify host") | |
77 parser$add_argument("-p", "--port", type="integer", default=NULL, | |
78 help="Specify port, otherwise randomly select") | |
79 parser$add_argument("--install_packages", action="store_true", default=FALSE, | |
80 help="If you want to auto install missing required packages.") | |
81 args <- parser$parse_args() | |
82 | |
83 # Set host | |
84 if(!is.null(args$host)){ | |
85 options(shiny.host = args$host) | |
86 } | |
87 | |
88 # Set port | |
89 if(!is.null(args$port)){ | |
90 options(shiny.port = args$port) | |
91 } | |
92 | |
93 #load data | |
94 data = read.delim(args$input, stringsAsFactors=FALSE) | |
95 index_grouping = grep("GROUPING_", names(data)) | |
96 names(data)[index_grouping] = sub(".*_", "",names(data)[index_grouping]) | |
97 colindex_id = grep("^ID_", names(data)) | |
98 | |
99 matrix = read.delim(args$matrix, check.names = FALSE, header = TRUE, sep='\t') | |
100 matrix = (as.matrix(matrix)>0) *1 #transform matrix to binary matrix | |
101 | |
102 ##### UI ###### | |
103 ui <- shinyUI(fluidPage( | |
104 navbarPage(theme = shinytheme("flatly"), id = "inTabset",selected = "panel1", | |
105 title = "SimText", | |
106 tabPanel("Home", value = "panel1", | |
107 tabPanel("Results", value = "panel1", | |
108 fluidRow(width=12, offset=0, | |
109 column(width = 4, style = "padding-right: 0px", | |
110 wellPanel(h5(strong("ID of interest")), | |
111 style = "background-color:white; | |
112 border-bottom: 2px solid #EEEEEE; | |
113 border-top-color: white; | |
114 border-right-color: white; | |
115 border-left-color: white; | |
116 box-shadow: 0px 0px 0px white; | |
117 padding:3px; | |
118 width: 100%"), | |
119 selectInput('ID', 'Select ID:', paste0(data[[colindex_id]]," (",seq(1,length(data[[colindex_id]])),")"))), | |
120 column(width = 3, style = "padding-right: 0px", | |
121 wellPanel(h5(strong("Color settings")), | |
122 style = "background-color:white; | |
123 border-bottom: 2px solid #EEEEEE; | |
124 border-top-color: white; | |
125 border-right-color: white; | |
126 border-left-color: white; | |
127 box-shadow: 0px 0px 0px white; | |
128 padding:3px; | |
129 width: 100%"), | |
130 radioButtons('colour', 'Color by:', c("Grouping variable", "Individual word")), | |
131 selectInput("colour_select", "Select:", choices=c(names(data)[index_grouping]))) | |
132 ), | |
133 fluidRow(width = 12, offset = 0, | |
134 column(width = 4, #style = "height:650px;", | |
135 wellPanel(textOutput("ID"), | |
136 style = "background-color: #333333; | |
137 color: white; | |
138 border-top-color: #333333; | |
139 border-left-color: #333333; | |
140 border-right-color: #333333; | |
141 box-shadow: 3px 3px 3px #d8d8d8; | |
142 margin-bottom: 0px; | |
143 padding:5px"), | |
144 wellPanel( | |
145 fluidRow( | |
146 column(width = 4, | |
147 numericInput('fontsize', 'Font size:',value = 7, min=1, max=50)), | |
148 column(width = 4, | |
149 numericInput('nword', 'Word number:',value = 50, min=1, max=100)), | |
150 column(width = 12, | |
151 withSpinner(plotOutput("WordcloudPlot",height= "325px"))), | |
152 column(width = 12, | |
153 downloadLink("downloadWordcloud", "Download"))), | |
154 style = "background-color: #ffffff; | |
155 border-bottom-color: #333333; | |
156 border-left-color: #333333; | |
157 height: 470px; | |
158 border-right-color: #333333; | |
159 box-shadow: 3px 3px 3px #d8d8d8; | |
160 margin-top: 0px"), | |
161 wellPanel(textOutput("Table"), | |
162 style = "background-color: #333333; | |
163 color: white; | |
164 border-top-color: #333333; | |
165 border-left-color: #333333; | |
166 border-right-color: #333333; | |
167 box-shadow: 3px 3px 3px #d8d8d8; | |
168 margin-bottom: 0px; | |
169 padding:5px"), | |
170 wellPanel(withSpinner(DT::dataTableOutput("datatable", height= "150px")), | |
171 style = "background-color: #ffffff; | |
172 border-bottom-color: #333333; | |
173 border-left-color: #333333; | |
174 border-right-color: #333333; | |
175 height: 175px; | |
176 box-shadow: 3px 3px 3px #d8d8d8; | |
177 margin-top: 0px") | |
178 ), | |
179 column(width = 8, #style='padding:0px;', | |
180 wellPanel("T-SNE plot of wordmatrix", | |
181 style = "background-color: #333333; | |
182 color: white; | |
183 border-top-color: #333333; | |
184 border-left-color: #333333; | |
185 border-right-color: #333333; | |
186 box-shadow: 3px 3px 3px #d8d8d8; | |
187 margin-bottom: 0px; | |
188 padding:5px"), | |
189 wellPanel( | |
190 fluidRow( | |
191 column(width = 2, | |
192 radioButtons('method', 'Method:',choices=c("t-SNE","UMAP"))), | |
193 column(width = 2, | |
194 numericInput('perplexity', 'Perplexity:',value = 2, min=1, max=nrow(data)-1)), | |
195 column(width = 2, | |
196 radioButtons('label', 'Labels:',choices=c("Index","IDs"))), | |
197 column(width = 2, | |
198 numericInput('labelsize', 'Label size:',value = 12, min=1, max=30)), | |
199 column(width = 8, style='padding:0px;', | |
200 withSpinner(plotlyOutput("TsnePlot",height=550))), | |
201 column(width = 4, style='padding:0px;', | |
202 withSpinner(plotOutput("TsnePlot_legend",height=550))), | |
203 column(width=2, | |
204 downloadLink("downloadPlotdata",label = "Download data"))), | |
205 style = "background-color: white; | |
206 border-bottom-color: #333333; | |
207 border-left-color: #333333; | |
208 border-right-color: #333333; | |
209 box-shadow: 3px 3px 3px #d8d8d8; | |
210 margin-top: 0px" | |
211 #height=575px | |
212 ))), | |
213 fluidRow(column(width = 12, | |
214 wellPanel("Hierarchical clustering of wordmatrix", | |
215 style = "background-color: #333333; | |
216 color: white; | |
217 border-top-color: #333333; | |
218 border-left-color: #333333; | |
219 border-right-color: #333333; | |
220 box-shadow: 3px 3px 3px #d8d8d8; | |
221 margin-bottom: 0px; | |
222 padding:5px") | |
223 , | |
224 wellPanel( | |
225 fluidRow( | |
226 column(width = 2, | |
227 radioButtons('hcmethod', 'Method:',choices=c("ward.D2","average","complete","single"))), | |
228 column(width = 2, | |
229 numericInput('labelsize_hc', 'Label size:', value = 8, min=1, max=30)) | |
230 ), | |
231 fluidRow( | |
232 column(width = 9, | |
233 withSpinner(plotOutput("hclust"))), | |
234 column(width = 3, | |
235 withSpinner(plotOutput("hclust_legend"))) | |
236 ), | |
237 style = "background-color: #ffffff; | |
238 border-bottom-color: #333333; | |
239 border-left-color: #333333; | |
240 border-right-color: #333333; | |
241 box-shadow: 3px 3px 3px #d8d8d8; | |
242 margin-top: 0px") | |
243 , | |
244 verbatimTextOutput("test") | |
245 )) | |
246 )) | |
247 | |
248 # , | |
249 #tabPanel("About", value = "panel2", h3("")) | |
250 | |
251 ))) | |
252 | |
253 | |
254 | |
255 | |
256 ###### SERVER ###### | |
257 server <- function(input, output, session) { | |
258 | |
259 ##### Global ##### | |
260 IDs = reactive(paste0(data[[colindex_id]]," (",seq(1,length(data[[colindex_id]])),")")) | |
261 index_ID = reactive({which(IDs() == input$ID)}) | |
262 | |
263 ##### Wordcloud plot and download ###### | |
264 | |
265 output$ID <- renderText({ | |
266 paste("Wordcloud of",data[[colindex_id]][index_ID()]) | |
267 }) | |
268 | |
269 output$WordcloudPlot <- renderPlot({ | |
270 ID_matrix = matrix[index_ID(),] | |
271 ID_matrix = data.frame(word= as.character(names(ID_matrix)), freq= ID_matrix) | |
272 colnames(ID_matrix) = c("word", "freq") | |
273 ID_matrix = ID_matrix[ID_matrix$freq == 1,] | |
274 | |
275 plotWordCloud(ID_matrix, | |
276 max.words = min(nrow(ID_matrix),input$nword), | |
277 scale= c(input$fontsize/10, input$fontsize/10), | |
278 colors= brewer.pal(8,"Greys")[4:8]) | |
279 }) | |
280 | |
281 output$downloadWordcloud <- downloadHandler( | |
282 filename = function() { | |
283 paste0(paste0("Wordcloudof",data[[colindex_id]][index_ID()]),".pdf", sep="") | |
284 }, | |
285 content = function(file) { | |
286 ID_matrix = matrix[index_ID(),] | |
287 ID_matrix = data.frame(word= names(ID_matrix), freq= ID_matrix) | |
288 colnames(ID_matrix) = c("word", "freq") | |
289 ID_matrix = ID_matrix[ID_matrix$freq == 1,] | |
290 | |
291 pdf(file) | |
292 plotWordCloud(ID_matrix, | |
293 max.words = min(max(nrow(ID_matrix)),input$nword), | |
294 scale= c(input$fontsize/10, input$fontsize/10), | |
295 colors= brewer.pal(8,"Greys")[4:8]) | |
296 dev.off() | |
297 } | |
298 ) | |
299 | |
300 ##### Table ##### | |
301 output$Table <- renderText({ | |
302 paste("Most occuring words among IDs") | |
303 }) | |
304 | |
305 output$datatable <- DT::renderDataTable({ | |
306 | |
307 colsum_data= data.frame(word=colnames(matrix), freq=colSums(matrix)) | |
308 colsum_data = colsum_data[order(colsum_data$freq, decreasing = T),] | |
309 colnames(colsum_data) = c("Word", paste0("IDs (total n=", nrow(matrix),")")) | |
310 | |
311 DT::datatable(colsum_data, | |
312 extensions = c("Buttons"), | |
313 rownames = F, | |
314 fillContainer = T, | |
315 escape=FALSE, | |
316 options = list(dom = "t", | |
317 scrollY = min(nrow(colsum_data),500), | |
318 scrollX= TRUE, | |
319 scroller = TRUE, | |
320 autoWidth = TRUE, | |
321 pageLength = nrow(colsum_data), | |
322 columnDefs = list(list(className = 'dt-center', targets = "_all"), | |
323 list(width = '50%', targets = "_all"))) | |
324 ) | |
325 }) | |
326 | |
327 ##### Colour/Grouping ##### | |
328 | |
329 outVar <- reactive({ | |
330 if(input$colour == "Grouping variable"){ | |
331 return(names(data)[index_grouping]) | |
332 } else { | |
333 return(colnames(matrix)) | |
334 } | |
335 }) | |
336 | |
337 observe({ | |
338 updateSelectInput(session, "colour_select", choices = outVar())}) | |
339 | |
340 colour_choice = reactive({ | |
341 if(input$colour == "Grouping variable"){ | |
342 return(as.factor(data[,input$colour_select])) | |
343 } else { | |
344 matrix = as.data.frame(matrix) | |
345 colour_byword = matrix[[input$colour_select]] | |
346 colour_byword = ifelse(colour_byword > 0,"Selected word associated with ID","Selected word not associated with ID") | |
347 return(as.factor(colour_byword)) | |
348 } | |
349 }) | |
350 | |
351 color_palette = reactive({palette=c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", | |
352 "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A", | |
353 "#00AFBB", "#E7B800", "#FC4E07", "#999999", "#E69F00", | |
354 "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00") | |
355 return( palette[1:length(levels(colour_choice()))] ) | |
356 }) | |
357 | |
358 ##### Dimension reduction plot and download ##### | |
359 | |
360 data.dimred = reactive({ | |
361 if (input$method == "t-SNE"){ | |
362 tsne_result <- Rtsne(matrix, perplexity = input$perplexity, check_duplicates=F) | |
363 data["X_Coord"] = tsne_result$Y[,1] | |
364 data["Y_Coord"] = tsne_result$Y[,2] | |
365 return(data) | |
366 } else if (input$method == "UMAP"){ | |
367 umap_result = umap(matrix) | |
368 data["X_Coord"] = umap_result$layout[,1] | |
369 data["Y_Coord"] = umap_result$layout[,2] | |
370 return(data) | |
371 } | |
372 }) | |
373 | |
374 output$TsnePlot <- renderPlotly({ | |
375 | |
376 if (input$label == "Index") { | |
377 labeling = as.character(seq(1,nrow(data))) | |
378 } else if (input$label == "IDs") { | |
379 labeling= as.character(data[[colindex_id]]) | |
380 } | |
381 | |
382 p = plot_ly(colors = color_palette()) %>% | |
383 add_trace(type="scatter", | |
384 mode = 'markers', | |
385 x = data.dimred()$X_Coord[index_ID()], | |
386 y = data.dimred()$Y_Coord[index_ID()], | |
387 opacity=0.15, | |
388 marker = list( | |
389 color = "grey", | |
390 size = 80)) %>% | |
391 add_trace(x=data.dimred()$X_Coord, | |
392 y=data.dimred()$Y_Coord, | |
393 type="scatter", | |
394 mode="text", | |
395 text= labeling, | |
396 textfont = list(size= input$labelsize), | |
397 color = factor(colour_choice())) %>% | |
398 add_trace(x=data.dimred()$X_Coord, | |
399 y=data.dimred()$Y_Coord, | |
400 type="scatter", | |
401 mode="markers", | |
402 opacity=0, | |
403 text= paste0( "ID: ",data[[colindex_id]], "\n", | |
404 "Index: ",seq(1,nrow(data)), "\n", | |
405 "Grouping: ", paste(data[,index_grouping])), | |
406 hoverinfo = "text", | |
407 color = factor(colour_choice())) %>% | |
408 layout(showlegend = FALSE, | |
409 yaxis= list(title = "", | |
410 zeroline = FALSE, | |
411 linecolor = toRGB("black"), | |
412 linewidth = 1, | |
413 showticklabels = FALSE, | |
414 showgrid = FALSE), | |
415 xaxis = list(title = "", | |
416 zeroline = FALSE, | |
417 linecolor = toRGB("black"), | |
418 linewidth = 1, | |
419 showticklabels = FALSE, | |
420 showgrid = FALSE), | |
421 autosize = T) %>% | |
422 config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "hoverClosestGeo", "hoverClosestGl2d", "toImage", | |
423 "hoverClosestCartesian", "lasso2d", "select2d", "resetScale2d", | |
424 "hoverCompareCartesian", "hoverClosestPie", "toggleSpikelines"), displaylogo = FALSE) %>% | |
425 style(hoverinfo = "none", traces = c(1,2)) | |
426 | |
427 p | |
428 }) | |
429 | |
430 #legend of plotly plot by ggplot | |
431 | |
432 output$TsnePlot_legend <- renderPlot({ | |
433 p = ggplot(data, aes(x=1, y=1)) + | |
434 geom_text(aes(label=seq(1,nrow(data)), colour=factor(colour_choice())), | |
435 size=3.5, fontface = "bold") + | |
436 theme_classic()+ | |
437 scale_color_manual(values = color_palette())+ | |
438 theme(legend.title = element_blank())+ | |
439 theme(legend.position = "right")+ | |
440 theme(legend.text=element_text(size=9)) | |
441 leg <- get_legend(p) | |
442 as_ggplot(leg) | |
443 }) | |
444 | |
445 output$downloadPlotdata <- downloadHandler( | |
446 filename = function() { | |
447 paste0(input$method,"_coordinates.csv") | |
448 }, | |
449 content = function(file) { | |
450 write.csv(data.dimred(), file, row.names = F) | |
451 } | |
452 ) | |
453 | |
454 ##### Hierarchical clustering ####### | |
455 | |
456 output$hclust <- renderPlot({ | |
457 set.seed(42) | |
458 clustering=hclust(dist(matrix), method=input$hcmethod) | |
459 par(oma=c(3,3,3,3)) | |
460 palette(color_palette()) | |
461 par(mar = rep(0, 4)) | |
462 myplclust(clustering, | |
463 labels=paste(data[[colindex_id]]), | |
464 lab.col=as.fumeric(as.character(colour_choice()), levels = sort(unique(as.character(colour_choice())))), | |
465 cex=as.numeric(input$labelsize_hc/10), | |
466 main="", | |
467 yaxt="n", | |
468 ylab= "") | |
469 }) | |
470 | |
471 #legend | |
472 output$hclust_legend <- renderPlot({ | |
473 p = ggplot(data, aes(x=1, y=1)) + | |
474 geom_text(aes(label=seq(1,nrow(data)), colour=factor(colour_choice())), fontface = "bold") + | |
475 theme_classic()+ | |
476 scale_color_manual(values = color_palette())+ | |
477 theme(legend.title = element_blank())+ | |
478 theme(legend.position = "right")+ | |
479 theme(legend.text=element_text(size=9)) | |
480 leg <- get_legend(p) | |
481 as_ggplot(leg) | |
482 }) | |
483 | |
484 | |
485 ##### Test field for development ###### | |
486 #output$test <- renderPrint({ | |
487 #}) | |
488 | |
489 } | |
490 | |
491 ###### APP ###### | |
492 shinyApp(ui, server) |