Mercurial > repos > dlalgroup > text_to_wordmatrix
diff simtext_app.R @ 0:dd696b179eb7 draft
"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
| author | dlalgroup | 
|---|---|
| date | Thu, 24 Sep 2020 02:58:53 +0000 | 
| parents | |
| children | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/simtext_app.R Thu Sep 24 02:58:53 2020 +0000 @@ -0,0 +1,492 @@ +#!/usr/bin/env Rscript +### SimText App ### +# +# The tool enables the exploration of data generated by text_to_wordmatrix or pmids_to_pubtator_matrix in a locally run ShinyApp. Features are word clouds for each initial search query, dimension reduction and hierarchical clustering of the binary matrix, and a table with words and their frequency among the search queries. +# +# Input: +# +# 1) Input 1: +# Tab-delimited table with +# - column with search queries starting with "ID_", e.g. "ID_gene" if initial search queries were genes +# - column(s) with grouping factor(s) to compare pre-existing categories of the initial search queries with the grouping based on text. The column names should start with "GROUPING_". If the column name is "GROUPING_disorder", "disorder" will be shown as a grouping variable in the app. +# 2) Input 2: +# Output of text_to_wordmatrix or pmids_to_pubtator_matrix, or binary matrix. +# +# optional arguments: +# -h, --help show help message +# -i INPUT, --input INPUT input file name. add path if file is not in working directory +# -m MATRIX, --matrix MATRIX matrix file name. add path if file is not in working directory +# -p PORT, --port PORT specify port, otherwise randomly selected +# +#Output: +#Shiny app with word clouds, dimensionality reduction plot, dendrogram of hierarchical clustering and table with words and their frequency among the entities. +# +#Packages + +if ( '--install_packages' %in% commandArgs()) { + print('Installing packages') + if (!require('shiny')) install.packages('shiny', repo="http://cran.rstudio.com/"); + if (!require('plotly')) install.packages('plotly', repo="http://cran.rstudio.com/"); + if (!require('DT')) install.packages('DT', repo="http://cran.rstudio.com/"); + if (!require('shinycssloaders')) install.packages('shinycssloaders', repo="http://cran.rstudio.com/"); + if (!require('shinythemes')) install.packages('shinythemes', repo="http://cran.rstudio.com/"); + if (!require('tableHTML')) install.packages('tableHTML', repo="http://cran.rstudio.com/"); + if (!require('argparse')) install.packages('argparse', repo="http://cran.rstudio.com/"); + if (!require('PubMedWordcloud')) install.packages('PubMedWordcloud', repo="http://cran.rstudio.com/"); + if (!require('ggplot2')) install.packages('ggplot2', repo="http://cran.rstudio.com/"); + if (!require('stringr')) install.packages('stringr', repo="http://cran.rstudio.com/"); + if (!require('tidyr')) install.packages('tidyr', repo="http://cran.rstudio.com/"); + if (!require('magrittr')) install.packages('magrittr', repo="http://cran.rstudio.com/"); + if (!require('plyr')) install.packages('plyr', repo="http://cran.rstudio.com/"); + if (!require('ggpubr')) install.packages('ggpubr', repo="http://cran.rstudio.com/"); + if (!require('rafalib')) install.packages('rafalib', repo="http://cran.rstudio.com/"); + if (!require('RColorBrewer')) install.packages('RColorBrewer', repo="http://cran.rstudio.com/"); + if (!require('dendextend')) install.packages('dendextend', repo="http://cran.rstudio.com/"); + if (!require('Rtsne')) install.packages('Rtsne', repo="http://cran.rstudio.com/"); + if (!require('umap')) install.packages('umap', repo="http://cran.rstudio.com/"); +} + +suppressPackageStartupMessages(library("shiny")) +suppressPackageStartupMessages(library("plotly")) +suppressPackageStartupMessages(library("DT")) +suppressPackageStartupMessages(library("shinycssloaders")) +suppressPackageStartupMessages(library("shinythemes")) +suppressPackageStartupMessages(library("tableHTML")) +suppressPackageStartupMessages(library("argparse")) +suppressPackageStartupMessages(library("PubMedWordcloud")) +suppressPackageStartupMessages(library("ggplot2")) +suppressPackageStartupMessages(library("stringr")) +suppressPackageStartupMessages(library("tidyr")) +suppressPackageStartupMessages(library("magrittr")) +suppressPackageStartupMessages(library("plyr")) +suppressPackageStartupMessages(library("ggpubr")) +suppressPackageStartupMessages(library("rafalib")) +suppressPackageStartupMessages(library("RColorBrewer")) +suppressPackageStartupMessages(library("dendextend")) +suppressPackageStartupMessages(library("Rtsne")) +suppressPackageStartupMessages(library("umap")) + +#command arguments +parser <- ArgumentParser() +parser$add_argument("-i", "--input", + help = "input file name. add path if file is not in working directory") +parser$add_argument("-m", "--matrix", default= NULL, + help = "matrix file name. add path if file is not in working directory") +parser$add_argument("--host", default=NULL, + help="Specify host") +parser$add_argument("-p", "--port", type="integer", default=NULL, + help="Specify port, otherwise randomly select") +parser$add_argument("--install_packages", action="store_true", default=FALSE, + help="If you want to auto install missing required packages.") +args <- parser$parse_args() + +# Set host +if(!is.null(args$host)){ + options(shiny.host = args$host) +} + +# Set port +if(!is.null(args$port)){ + options(shiny.port = args$port) +} + +#load data +data = read.delim(args$input, stringsAsFactors=FALSE) +index_grouping = grep("GROUPING_", names(data)) +names(data)[index_grouping] = sub(".*_", "",names(data)[index_grouping]) +colindex_id = grep("^ID_", names(data)) + +matrix = read.delim(args$matrix, check.names = FALSE, header = TRUE, sep='\t') +matrix = (as.matrix(matrix)>0) *1 #transform matrix to binary matrix + +##### UI ###### +ui <- shinyUI(fluidPage( + navbarPage(theme = shinytheme("flatly"), id = "inTabset",selected = "panel1", + title = "SimText", + tabPanel("Home", value = "panel1", + tabPanel("Results", value = "panel1", + fluidRow(width=12, offset=0, + column(width = 4, style = "padding-right: 0px", + wellPanel(h5(strong("ID of interest")), + style = "background-color:white; + border-bottom: 2px solid #EEEEEE; + border-top-color: white; + border-right-color: white; + border-left-color: white; + box-shadow: 0px 0px 0px white; + padding:3px; + width: 100%"), + selectInput('ID', 'Select ID:', paste0(data[[colindex_id]]," (",seq(1,length(data[[colindex_id]])),")"))), + column(width = 3, style = "padding-right: 0px", + wellPanel(h5(strong("Color settings")), + style = "background-color:white; + border-bottom: 2px solid #EEEEEE; + border-top-color: white; + border-right-color: white; + border-left-color: white; + box-shadow: 0px 0px 0px white; + padding:3px; + width: 100%"), + radioButtons('colour', 'Color by:', c("Grouping variable", "Individual word")), + selectInput("colour_select", "Select:", choices=c(names(data)[index_grouping]))) + ), + fluidRow(width = 12, offset = 0, + column(width = 4, #style = "height:650px;", + wellPanel(textOutput("ID"), + style = "background-color: #333333; + color: white; + border-top-color: #333333; + border-left-color: #333333; + border-right-color: #333333; + box-shadow: 3px 3px 3px #d8d8d8; + margin-bottom: 0px; + padding:5px"), + wellPanel( + fluidRow( + column(width = 4, + numericInput('fontsize', 'Font size:',value = 7, min=1, max=50)), + column(width = 4, + numericInput('nword', 'Word number:',value = 50, min=1, max=100)), + column(width = 12, + withSpinner(plotOutput("WordcloudPlot",height= "325px"))), + column(width = 12, + downloadLink("downloadWordcloud", "Download"))), + style = "background-color: #ffffff; + border-bottom-color: #333333; + border-left-color: #333333; + height: 470px; + border-right-color: #333333; + box-shadow: 3px 3px 3px #d8d8d8; + margin-top: 0px"), + wellPanel(textOutput("Table"), + style = "background-color: #333333; + color: white; + border-top-color: #333333; + border-left-color: #333333; + border-right-color: #333333; + box-shadow: 3px 3px 3px #d8d8d8; + margin-bottom: 0px; + padding:5px"), + wellPanel(withSpinner(DT::dataTableOutput("datatable", height= "150px")), + style = "background-color: #ffffff; + border-bottom-color: #333333; + border-left-color: #333333; + border-right-color: #333333; + height: 175px; + box-shadow: 3px 3px 3px #d8d8d8; + margin-top: 0px") + ), + column(width = 8, #style='padding:0px;', + wellPanel("T-SNE plot of wordmatrix", + style = "background-color: #333333; + color: white; + border-top-color: #333333; + border-left-color: #333333; + border-right-color: #333333; + box-shadow: 3px 3px 3px #d8d8d8; + margin-bottom: 0px; + padding:5px"), + wellPanel( + fluidRow( + column(width = 2, + radioButtons('method', 'Method:',choices=c("t-SNE","UMAP"))), + column(width = 2, + numericInput('perplexity', 'Perplexity:',value = 2, min=1, max=nrow(data)-1)), + column(width = 2, + radioButtons('label', 'Labels:',choices=c("Index","IDs"))), + column(width = 2, + numericInput('labelsize', 'Label size:',value = 12, min=1, max=30)), + column(width = 8, style='padding:0px;', + withSpinner(plotlyOutput("TsnePlot",height=550))), + column(width = 4, style='padding:0px;', + withSpinner(plotOutput("TsnePlot_legend",height=550))), + column(width=2, + downloadLink("downloadPlotdata",label = "Download data"))), + style = "background-color: white; + border-bottom-color: #333333; + border-left-color: #333333; + border-right-color: #333333; + box-shadow: 3px 3px 3px #d8d8d8; + margin-top: 0px" + #height=575px + ))), + fluidRow(column(width = 12, + wellPanel("Hierarchical clustering of wordmatrix", + style = "background-color: #333333; + color: white; + border-top-color: #333333; + border-left-color: #333333; + border-right-color: #333333; + box-shadow: 3px 3px 3px #d8d8d8; + margin-bottom: 0px; + padding:5px") + , + wellPanel( + fluidRow( + column(width = 2, + radioButtons('hcmethod', 'Method:',choices=c("ward.D2","average","complete","single"))), + column(width = 2, + numericInput('labelsize_hc', 'Label size:', value = 8, min=1, max=30)) + ), + fluidRow( + column(width = 9, + withSpinner(plotOutput("hclust"))), + column(width = 3, + withSpinner(plotOutput("hclust_legend"))) + ), + style = "background-color: #ffffff; + border-bottom-color: #333333; + border-left-color: #333333; + border-right-color: #333333; + box-shadow: 3px 3px 3px #d8d8d8; + margin-top: 0px") + , + verbatimTextOutput("test") + )) + )) + + # , + #tabPanel("About", value = "panel2", h3("")) + + ))) + + + + +###### SERVER ###### +server <- function(input, output, session) { + + ##### Global ##### + IDs = reactive(paste0(data[[colindex_id]]," (",seq(1,length(data[[colindex_id]])),")")) + index_ID = reactive({which(IDs() == input$ID)}) + + ##### Wordcloud plot and download ###### + + output$ID <- renderText({ + paste("Wordcloud of",data[[colindex_id]][index_ID()]) + }) + + output$WordcloudPlot <- renderPlot({ + ID_matrix = matrix[index_ID(),] + ID_matrix = data.frame(word= as.character(names(ID_matrix)), freq= ID_matrix) + colnames(ID_matrix) = c("word", "freq") + ID_matrix = ID_matrix[ID_matrix$freq == 1,] + + plotWordCloud(ID_matrix, + max.words = min(nrow(ID_matrix),input$nword), + scale= c(input$fontsize/10, input$fontsize/10), + colors= brewer.pal(8,"Greys")[4:8]) + }) + + output$downloadWordcloud <- downloadHandler( + filename = function() { + paste0(paste0("Wordcloudof",data[[colindex_id]][index_ID()]),".pdf", sep="") + }, + content = function(file) { + ID_matrix = matrix[index_ID(),] + ID_matrix = data.frame(word= names(ID_matrix), freq= ID_matrix) + colnames(ID_matrix) = c("word", "freq") + ID_matrix = ID_matrix[ID_matrix$freq == 1,] + + pdf(file) + plotWordCloud(ID_matrix, + max.words = min(max(nrow(ID_matrix)),input$nword), + scale= c(input$fontsize/10, input$fontsize/10), + colors= brewer.pal(8,"Greys")[4:8]) + dev.off() + } + ) + + ##### Table ##### + output$Table <- renderText({ + paste("Most occuring words among IDs") + }) + + output$datatable <- DT::renderDataTable({ + + colsum_data= data.frame(word=colnames(matrix), freq=colSums(matrix)) + colsum_data = colsum_data[order(colsum_data$freq, decreasing = T),] + colnames(colsum_data) = c("Word", paste0("IDs (total n=", nrow(matrix),")")) + + DT::datatable(colsum_data, + extensions = c("Buttons"), + rownames = F, + fillContainer = T, + escape=FALSE, + options = list(dom = "t", + scrollY = min(nrow(colsum_data),500), + scrollX= TRUE, + scroller = TRUE, + autoWidth = TRUE, + pageLength = nrow(colsum_data), + columnDefs = list(list(className = 'dt-center', targets = "_all"), + list(width = '50%', targets = "_all"))) + ) + }) + + ##### Colour/Grouping ##### + + outVar <- reactive({ + if(input$colour == "Grouping variable"){ + return(names(data)[index_grouping]) + } else { + return(colnames(matrix)) + } + }) + + observe({ + updateSelectInput(session, "colour_select", choices = outVar())}) + + colour_choice = reactive({ + if(input$colour == "Grouping variable"){ + return(as.factor(data[,input$colour_select])) + } else { + matrix = as.data.frame(matrix) + colour_byword = matrix[[input$colour_select]] + colour_byword = ifelse(colour_byword > 0,"Selected word associated with ID","Selected word not associated with ID") + return(as.factor(colour_byword)) + } + }) + + color_palette = reactive({palette=c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", + "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A", + "#00AFBB", "#E7B800", "#FC4E07", "#999999", "#E69F00", + "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00") + return( palette[1:length(levels(colour_choice()))] ) + }) + + ##### Dimension reduction plot and download ##### + + data.dimred = reactive({ + if (input$method == "t-SNE"){ + tsne_result <- Rtsne(matrix, perplexity = input$perplexity, check_duplicates=F) + data["X_Coord"] = tsne_result$Y[,1] + data["Y_Coord"] = tsne_result$Y[,2] + return(data) + } else if (input$method == "UMAP"){ + umap_result = umap(matrix) + data["X_Coord"] = umap_result$layout[,1] + data["Y_Coord"] = umap_result$layout[,2] + return(data) + } + }) + + output$TsnePlot <- renderPlotly({ + + if (input$label == "Index") { + labeling = as.character(seq(1,nrow(data))) + } else if (input$label == "IDs") { + labeling= as.character(data[[colindex_id]]) + } + + p = plot_ly(colors = color_palette()) %>% + add_trace(type="scatter", + mode = 'markers', + x = data.dimred()$X_Coord[index_ID()], + y = data.dimred()$Y_Coord[index_ID()], + opacity=0.15, + marker = list( + color = "grey", + size = 80)) %>% + add_trace(x=data.dimred()$X_Coord, + y=data.dimred()$Y_Coord, + type="scatter", + mode="text", + text= labeling, + textfont = list(size= input$labelsize), + color = factor(colour_choice())) %>% + add_trace(x=data.dimred()$X_Coord, + y=data.dimred()$Y_Coord, + type="scatter", + mode="markers", + opacity=0, + text= paste0( "ID: ",data[[colindex_id]], "\n", + "Index: ",seq(1,nrow(data)), "\n", + "Grouping: ", paste(data[,index_grouping])), + hoverinfo = "text", + color = factor(colour_choice())) %>% + layout(showlegend = FALSE, + yaxis= list(title = "", + zeroline = FALSE, + linecolor = toRGB("black"), + linewidth = 1, + showticklabels = FALSE, + showgrid = FALSE), + xaxis = list(title = "", + zeroline = FALSE, + linecolor = toRGB("black"), + linewidth = 1, + showticklabels = FALSE, + showgrid = FALSE), + autosize = T) %>% + config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "hoverClosestGeo", "hoverClosestGl2d", "toImage", + "hoverClosestCartesian", "lasso2d", "select2d", "resetScale2d", + "hoverCompareCartesian", "hoverClosestPie", "toggleSpikelines"), displaylogo = FALSE) %>% + style(hoverinfo = "none", traces = c(1,2)) + + p + }) + + #legend of plotly plot by ggplot + + output$TsnePlot_legend <- renderPlot({ + p = ggplot(data, aes(x=1, y=1)) + + geom_text(aes(label=seq(1,nrow(data)), colour=factor(colour_choice())), + size=3.5, fontface = "bold") + + theme_classic()+ + scale_color_manual(values = color_palette())+ + theme(legend.title = element_blank())+ + theme(legend.position = "right")+ + theme(legend.text=element_text(size=9)) + leg <- get_legend(p) + as_ggplot(leg) + }) + + output$downloadPlotdata <- downloadHandler( + filename = function() { + paste0(input$method,"_coordinates.csv") + }, + content = function(file) { + write.csv(data.dimred(), file, row.names = F) + } + ) + + ##### Hierarchical clustering ####### + + output$hclust <- renderPlot({ + set.seed(42) + clustering=hclust(dist(matrix), method=input$hcmethod) + par(oma=c(3,3,3,3)) + palette(color_palette()) + par(mar = rep(0, 4)) + myplclust(clustering, + labels=paste(data[[colindex_id]]), + lab.col=as.fumeric(as.character(colour_choice()), levels = sort(unique(as.character(colour_choice())))), + cex=as.numeric(input$labelsize_hc/10), + main="", + yaxt="n", + ylab= "") + }) + + #legend + output$hclust_legend <- renderPlot({ + p = ggplot(data, aes(x=1, y=1)) + + geom_text(aes(label=seq(1,nrow(data)), colour=factor(colour_choice())), fontface = "bold") + + theme_classic()+ + scale_color_manual(values = color_palette())+ + theme(legend.title = element_blank())+ + theme(legend.position = "right")+ + theme(legend.text=element_text(size=9)) + leg <- get_legend(p) + as_ggplot(leg) + }) + + + ##### Test field for development ###### + #output$test <- renderPrint({ + #}) + + } + +###### APP ###### +shinyApp(ui, server)
