comparison simtext_app.R @ 0:34ed44f3f85c draft

"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
author dlalgroup
date Thu, 24 Sep 2020 02:17:05 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:34ed44f3f85c
1 #!/usr/bin/env Rscript
2 ### SimText App ###
3 #
4 # The tool enables the exploration of data generated by text_to_wordmatrix or pmids_to_pubtator_matrix in a locally run ShinyApp. Features are word clouds for each initial search query, dimension reduction and hierarchical clustering of the binary matrix, and a table with words and their frequency among the search queries.
5 #
6 # Input:
7 #
8 # 1) Input 1:
9 # Tab-delimited table with
10 # - column with search queries starting with "ID_", e.g. "ID_gene" if initial search queries were genes
11 # - column(s) with grouping factor(s) to compare pre-existing categories of the initial search queries with the grouping based on text. The column names should start with "GROUPING_". If the column name is "GROUPING_disorder", "disorder" will be shown as a grouping variable in the app.
12 # 2) Input 2:
13 # Output of text_to_wordmatrix or pmids_to_pubtator_matrix, or binary matrix.
14 #
15 # optional arguments:
16 # -h, --help show help message
17 # -i INPUT, --input INPUT input file name. add path if file is not in working directory
18 # -m MATRIX, --matrix MATRIX matrix file name. add path if file is not in working directory
19 # -p PORT, --port PORT specify port, otherwise randomly selected
20 #
21 #Output:
22 #Shiny app with word clouds, dimensionality reduction plot, dendrogram of hierarchical clustering and table with words and their frequency among the entities.
23 #
24 #Packages
25
26 if ( '--install_packages' %in% commandArgs()) {
27 print('Installing packages')
28 if (!require('shiny')) install.packages('shiny', repo="http://cran.rstudio.com/");
29 if (!require('plotly')) install.packages('plotly', repo="http://cran.rstudio.com/");
30 if (!require('DT')) install.packages('DT', repo="http://cran.rstudio.com/");
31 if (!require('shinycssloaders')) install.packages('shinycssloaders', repo="http://cran.rstudio.com/");
32 if (!require('shinythemes')) install.packages('shinythemes', repo="http://cran.rstudio.com/");
33 if (!require('tableHTML')) install.packages('tableHTML', repo="http://cran.rstudio.com/");
34 if (!require('argparse')) install.packages('argparse', repo="http://cran.rstudio.com/");
35 if (!require('PubMedWordcloud')) install.packages('PubMedWordcloud', repo="http://cran.rstudio.com/");
36 if (!require('ggplot2')) install.packages('ggplot2', repo="http://cran.rstudio.com/");
37 if (!require('stringr')) install.packages('stringr', repo="http://cran.rstudio.com/");
38 if (!require('tidyr')) install.packages('tidyr', repo="http://cran.rstudio.com/");
39 if (!require('magrittr')) install.packages('magrittr', repo="http://cran.rstudio.com/");
40 if (!require('plyr')) install.packages('plyr', repo="http://cran.rstudio.com/");
41 if (!require('ggpubr')) install.packages('ggpubr', repo="http://cran.rstudio.com/");
42 if (!require('rafalib')) install.packages('rafalib', repo="http://cran.rstudio.com/");
43 if (!require('RColorBrewer')) install.packages('RColorBrewer', repo="http://cran.rstudio.com/");
44 if (!require('dendextend')) install.packages('dendextend', repo="http://cran.rstudio.com/");
45 if (!require('Rtsne')) install.packages('Rtsne', repo="http://cran.rstudio.com/");
46 if (!require('umap')) install.packages('umap', repo="http://cran.rstudio.com/");
47 }
48
49 suppressPackageStartupMessages(library("shiny"))
50 suppressPackageStartupMessages(library("plotly"))
51 suppressPackageStartupMessages(library("DT"))
52 suppressPackageStartupMessages(library("shinycssloaders"))
53 suppressPackageStartupMessages(library("shinythemes"))
54 suppressPackageStartupMessages(library("tableHTML"))
55 suppressPackageStartupMessages(library("argparse"))
56 suppressPackageStartupMessages(library("PubMedWordcloud"))
57 suppressPackageStartupMessages(library("ggplot2"))
58 suppressPackageStartupMessages(library("stringr"))
59 suppressPackageStartupMessages(library("tidyr"))
60 suppressPackageStartupMessages(library("magrittr"))
61 suppressPackageStartupMessages(library("plyr"))
62 suppressPackageStartupMessages(library("ggpubr"))
63 suppressPackageStartupMessages(library("rafalib"))
64 suppressPackageStartupMessages(library("RColorBrewer"))
65 suppressPackageStartupMessages(library("dendextend"))
66 suppressPackageStartupMessages(library("Rtsne"))
67 suppressPackageStartupMessages(library("umap"))
68
69 #command arguments
70 parser <- ArgumentParser()
71 parser$add_argument("-i", "--input",
72 help = "input file name. add path if file is not in working directory")
73 parser$add_argument("-m", "--matrix", default= NULL,
74 help = "matrix file name. add path if file is not in working directory")
75 parser$add_argument("--host", default=NULL,
76 help="Specify host")
77 parser$add_argument("-p", "--port", type="integer", default=NULL,
78 help="Specify port, otherwise randomly select")
79 parser$add_argument("--install_packages", action="store_true", default=FALSE,
80 help="If you want to auto install missing required packages.")
81 args <- parser$parse_args()
82
83 # Set host
84 if(!is.null(args$host)){
85 options(shiny.host = args$host)
86 }
87
88 # Set port
89 if(!is.null(args$port)){
90 options(shiny.port = args$port)
91 }
92
93 #load data
94 data = read.delim(args$input, stringsAsFactors=FALSE)
95 index_grouping = grep("GROUPING_", names(data))
96 names(data)[index_grouping] = sub(".*_", "",names(data)[index_grouping])
97 colindex_id = grep("^ID_", names(data))
98
99 matrix = read.delim(args$matrix, check.names = FALSE, header = TRUE, sep='\t')
100 matrix = (as.matrix(matrix)>0) *1 #transform matrix to binary matrix
101
102 ##### UI ######
103 ui <- shinyUI(fluidPage(
104 navbarPage(theme = shinytheme("flatly"), id = "inTabset",selected = "panel1",
105 title = "SimText",
106 tabPanel("Home", value = "panel1",
107 tabPanel("Results", value = "panel1",
108 fluidRow(width=12, offset=0,
109 column(width = 4, style = "padding-right: 0px",
110 wellPanel(h5(strong("ID of interest")),
111 style = "background-color:white;
112 border-bottom: 2px solid #EEEEEE;
113 border-top-color: white;
114 border-right-color: white;
115 border-left-color: white;
116 box-shadow: 0px 0px 0px white;
117 padding:3px;
118 width: 100%"),
119 selectInput('ID', 'Select ID:', paste0(data[[colindex_id]]," (",seq(1,length(data[[colindex_id]])),")"))),
120 column(width = 3, style = "padding-right: 0px",
121 wellPanel(h5(strong("Color settings")),
122 style = "background-color:white;
123 border-bottom: 2px solid #EEEEEE;
124 border-top-color: white;
125 border-right-color: white;
126 border-left-color: white;
127 box-shadow: 0px 0px 0px white;
128 padding:3px;
129 width: 100%"),
130 radioButtons('colour', 'Color by:', c("Grouping variable", "Individual word")),
131 selectInput("colour_select", "Select:", choices=c(names(data)[index_grouping])))
132 ),
133 fluidRow(width = 12, offset = 0,
134 column(width = 4, #style = "height:650px;",
135 wellPanel(textOutput("ID"),
136 style = "background-color: #333333;
137 color: white;
138 border-top-color: #333333;
139 border-left-color: #333333;
140 border-right-color: #333333;
141 box-shadow: 3px 3px 3px #d8d8d8;
142 margin-bottom: 0px;
143 padding:5px"),
144 wellPanel(
145 fluidRow(
146 column(width = 4,
147 numericInput('fontsize', 'Font size:',value = 7, min=1, max=50)),
148 column(width = 4,
149 numericInput('nword', 'Word number:',value = 50, min=1, max=100)),
150 column(width = 12,
151 withSpinner(plotOutput("WordcloudPlot",height= "325px"))),
152 column(width = 12,
153 downloadLink("downloadWordcloud", "Download"))),
154 style = "background-color: #ffffff;
155 border-bottom-color: #333333;
156 border-left-color: #333333;
157 height: 470px;
158 border-right-color: #333333;
159 box-shadow: 3px 3px 3px #d8d8d8;
160 margin-top: 0px"),
161 wellPanel(textOutput("Table"),
162 style = "background-color: #333333;
163 color: white;
164 border-top-color: #333333;
165 border-left-color: #333333;
166 border-right-color: #333333;
167 box-shadow: 3px 3px 3px #d8d8d8;
168 margin-bottom: 0px;
169 padding:5px"),
170 wellPanel(withSpinner(DT::dataTableOutput("datatable", height= "150px")),
171 style = "background-color: #ffffff;
172 border-bottom-color: #333333;
173 border-left-color: #333333;
174 border-right-color: #333333;
175 height: 175px;
176 box-shadow: 3px 3px 3px #d8d8d8;
177 margin-top: 0px")
178 ),
179 column(width = 8, #style='padding:0px;',
180 wellPanel("T-SNE plot of wordmatrix",
181 style = "background-color: #333333;
182 color: white;
183 border-top-color: #333333;
184 border-left-color: #333333;
185 border-right-color: #333333;
186 box-shadow: 3px 3px 3px #d8d8d8;
187 margin-bottom: 0px;
188 padding:5px"),
189 wellPanel(
190 fluidRow(
191 column(width = 2,
192 radioButtons('method', 'Method:',choices=c("t-SNE","UMAP"))),
193 column(width = 2,
194 numericInput('perplexity', 'Perplexity:',value = 2, min=1, max=nrow(data)-1)),
195 column(width = 2,
196 radioButtons('label', 'Labels:',choices=c("Index","IDs"))),
197 column(width = 2,
198 numericInput('labelsize', 'Label size:',value = 12, min=1, max=30)),
199 column(width = 8, style='padding:0px;',
200 withSpinner(plotlyOutput("TsnePlot",height=550))),
201 column(width = 4, style='padding:0px;',
202 withSpinner(plotOutput("TsnePlot_legend",height=550))),
203 column(width=2,
204 downloadLink("downloadPlotdata",label = "Download data"))),
205 style = "background-color: white;
206 border-bottom-color: #333333;
207 border-left-color: #333333;
208 border-right-color: #333333;
209 box-shadow: 3px 3px 3px #d8d8d8;
210 margin-top: 0px"
211 #height=575px
212 ))),
213 fluidRow(column(width = 12,
214 wellPanel("Hierarchical clustering of wordmatrix",
215 style = "background-color: #333333;
216 color: white;
217 border-top-color: #333333;
218 border-left-color: #333333;
219 border-right-color: #333333;
220 box-shadow: 3px 3px 3px #d8d8d8;
221 margin-bottom: 0px;
222 padding:5px")
223 ,
224 wellPanel(
225 fluidRow(
226 column(width = 2,
227 radioButtons('hcmethod', 'Method:',choices=c("ward.D2","average","complete","single"))),
228 column(width = 2,
229 numericInput('labelsize_hc', 'Label size:', value = 8, min=1, max=30))
230 ),
231 fluidRow(
232 column(width = 9,
233 withSpinner(plotOutput("hclust"))),
234 column(width = 3,
235 withSpinner(plotOutput("hclust_legend")))
236 ),
237 style = "background-color: #ffffff;
238 border-bottom-color: #333333;
239 border-left-color: #333333;
240 border-right-color: #333333;
241 box-shadow: 3px 3px 3px #d8d8d8;
242 margin-top: 0px")
243 ,
244 verbatimTextOutput("test")
245 ))
246 ))
247
248 # ,
249 #tabPanel("About", value = "panel2", h3(""))
250
251 )))
252
253
254
255
256 ###### SERVER ######
257 server <- function(input, output, session) {
258
259 ##### Global #####
260 IDs = reactive(paste0(data[[colindex_id]]," (",seq(1,length(data[[colindex_id]])),")"))
261 index_ID = reactive({which(IDs() == input$ID)})
262
263 ##### Wordcloud plot and download ######
264
265 output$ID <- renderText({
266 paste("Wordcloud of",data[[colindex_id]][index_ID()])
267 })
268
269 output$WordcloudPlot <- renderPlot({
270 ID_matrix = matrix[index_ID(),]
271 ID_matrix = data.frame(word= as.character(names(ID_matrix)), freq= ID_matrix)
272 colnames(ID_matrix) = c("word", "freq")
273 ID_matrix = ID_matrix[ID_matrix$freq == 1,]
274
275 plotWordCloud(ID_matrix,
276 max.words = min(nrow(ID_matrix),input$nword),
277 scale= c(input$fontsize/10, input$fontsize/10),
278 colors= brewer.pal(8,"Greys")[4:8])
279 })
280
281 output$downloadWordcloud <- downloadHandler(
282 filename = function() {
283 paste0(paste0("Wordcloudof",data[[colindex_id]][index_ID()]),".pdf", sep="")
284 },
285 content = function(file) {
286 ID_matrix = matrix[index_ID(),]
287 ID_matrix = data.frame(word= names(ID_matrix), freq= ID_matrix)
288 colnames(ID_matrix) = c("word", "freq")
289 ID_matrix = ID_matrix[ID_matrix$freq == 1,]
290
291 pdf(file)
292 plotWordCloud(ID_matrix,
293 max.words = min(max(nrow(ID_matrix)),input$nword),
294 scale= c(input$fontsize/10, input$fontsize/10),
295 colors= brewer.pal(8,"Greys")[4:8])
296 dev.off()
297 }
298 )
299
300 ##### Table #####
301 output$Table <- renderText({
302 paste("Most occuring words among IDs")
303 })
304
305 output$datatable <- DT::renderDataTable({
306
307 colsum_data= data.frame(word=colnames(matrix), freq=colSums(matrix))
308 colsum_data = colsum_data[order(colsum_data$freq, decreasing = T),]
309 colnames(colsum_data) = c("Word", paste0("IDs (total n=", nrow(matrix),")"))
310
311 DT::datatable(colsum_data,
312 extensions = c("Buttons"),
313 rownames = F,
314 fillContainer = T,
315 escape=FALSE,
316 options = list(dom = "t",
317 scrollY = min(nrow(colsum_data),500),
318 scrollX= TRUE,
319 scroller = TRUE,
320 autoWidth = TRUE,
321 pageLength = nrow(colsum_data),
322 columnDefs = list(list(className = 'dt-center', targets = "_all"),
323 list(width = '50%', targets = "_all")))
324 )
325 })
326
327 ##### Colour/Grouping #####
328
329 outVar <- reactive({
330 if(input$colour == "Grouping variable"){
331 return(names(data)[index_grouping])
332 } else {
333 return(colnames(matrix))
334 }
335 })
336
337 observe({
338 updateSelectInput(session, "colour_select", choices = outVar())})
339
340 colour_choice = reactive({
341 if(input$colour == "Grouping variable"){
342 return(as.factor(data[,input$colour_select]))
343 } else {
344 matrix = as.data.frame(matrix)
345 colour_byword = matrix[[input$colour_select]]
346 colour_byword = ifelse(colour_byword > 0,"Selected word associated with ID","Selected word not associated with ID")
347 return(as.factor(colour_byword))
348 }
349 })
350
351 color_palette = reactive({palette=c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99",
352 "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A",
353 "#00AFBB", "#E7B800", "#FC4E07", "#999999", "#E69F00",
354 "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00")
355 return( palette[1:length(levels(colour_choice()))] )
356 })
357
358 ##### Dimension reduction plot and download #####
359
360 data.dimred = reactive({
361 if (input$method == "t-SNE"){
362 tsne_result <- Rtsne(matrix, perplexity = input$perplexity, check_duplicates=F)
363 data["X_Coord"] = tsne_result$Y[,1]
364 data["Y_Coord"] = tsne_result$Y[,2]
365 return(data)
366 } else if (input$method == "UMAP"){
367 umap_result = umap(matrix)
368 data["X_Coord"] = umap_result$layout[,1]
369 data["Y_Coord"] = umap_result$layout[,2]
370 return(data)
371 }
372 })
373
374 output$TsnePlot <- renderPlotly({
375
376 if (input$label == "Index") {
377 labeling = as.character(seq(1,nrow(data)))
378 } else if (input$label == "IDs") {
379 labeling= as.character(data[[colindex_id]])
380 }
381
382 p = plot_ly(colors = color_palette()) %>%
383 add_trace(type="scatter",
384 mode = 'markers',
385 x = data.dimred()$X_Coord[index_ID()],
386 y = data.dimred()$Y_Coord[index_ID()],
387 opacity=0.15,
388 marker = list(
389 color = "grey",
390 size = 80)) %>%
391 add_trace(x=data.dimred()$X_Coord,
392 y=data.dimred()$Y_Coord,
393 type="scatter",
394 mode="text",
395 text= labeling,
396 textfont = list(size= input$labelsize),
397 color = factor(colour_choice())) %>%
398 add_trace(x=data.dimred()$X_Coord,
399 y=data.dimred()$Y_Coord,
400 type="scatter",
401 mode="markers",
402 opacity=0,
403 text= paste0( "ID: ",data[[colindex_id]], "\n",
404 "Index: ",seq(1,nrow(data)), "\n",
405 "Grouping: ", paste(data[,index_grouping])),
406 hoverinfo = "text",
407 color = factor(colour_choice())) %>%
408 layout(showlegend = FALSE,
409 yaxis= list(title = "",
410 zeroline = FALSE,
411 linecolor = toRGB("black"),
412 linewidth = 1,
413 showticklabels = FALSE,
414 showgrid = FALSE),
415 xaxis = list(title = "",
416 zeroline = FALSE,
417 linecolor = toRGB("black"),
418 linewidth = 1,
419 showticklabels = FALSE,
420 showgrid = FALSE),
421 autosize = T) %>%
422 config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "hoverClosestGeo", "hoverClosestGl2d", "toImage",
423 "hoverClosestCartesian", "lasso2d", "select2d", "resetScale2d",
424 "hoverCompareCartesian", "hoverClosestPie", "toggleSpikelines"), displaylogo = FALSE) %>%
425 style(hoverinfo = "none", traces = c(1,2))
426
427 p
428 })
429
430 #legend of plotly plot by ggplot
431
432 output$TsnePlot_legend <- renderPlot({
433 p = ggplot(data, aes(x=1, y=1)) +
434 geom_text(aes(label=seq(1,nrow(data)), colour=factor(colour_choice())),
435 size=3.5, fontface = "bold") +
436 theme_classic()+
437 scale_color_manual(values = color_palette())+
438 theme(legend.title = element_blank())+
439 theme(legend.position = "right")+
440 theme(legend.text=element_text(size=9))
441 leg <- get_legend(p)
442 as_ggplot(leg)
443 })
444
445 output$downloadPlotdata <- downloadHandler(
446 filename = function() {
447 paste0(input$method,"_coordinates.csv")
448 },
449 content = function(file) {
450 write.csv(data.dimred(), file, row.names = F)
451 }
452 )
453
454 ##### Hierarchical clustering #######
455
456 output$hclust <- renderPlot({
457 set.seed(42)
458 clustering=hclust(dist(matrix), method=input$hcmethod)
459 par(oma=c(3,3,3,3))
460 palette(color_palette())
461 par(mar = rep(0, 4))
462 myplclust(clustering,
463 labels=paste(data[[colindex_id]]),
464 lab.col=as.fumeric(as.character(colour_choice()), levels = sort(unique(as.character(colour_choice())))),
465 cex=as.numeric(input$labelsize_hc/10),
466 main="",
467 yaxt="n",
468 ylab= "")
469 })
470
471 #legend
472 output$hclust_legend <- renderPlot({
473 p = ggplot(data, aes(x=1, y=1)) +
474 geom_text(aes(label=seq(1,nrow(data)), colour=factor(colour_choice())), fontface = "bold") +
475 theme_classic()+
476 scale_color_manual(values = color_palette())+
477 theme(legend.title = element_blank())+
478 theme(legend.position = "right")+
479 theme(legend.text=element_text(size=9))
480 leg <- get_legend(p)
481 as_ggplot(leg)
482 })
483
484
485 ##### Test field for development ######
486 #output$test <- renderPrint({
487 #})
488
489 }
490
491 ###### APP ######
492 shinyApp(ui, server)