diff simtext_app.R @ 0:34ed44f3f85c draft

"planemo upload for repository https://github.com/dlal-group/simtext commit fd3f5b7b0506fbc460f2a281f694cb57f1c90a3c-dirty"
author dlalgroup
date Thu, 24 Sep 2020 02:17:05 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/simtext_app.R	Thu Sep 24 02:17:05 2020 +0000
@@ -0,0 +1,492 @@
+#!/usr/bin/env Rscript
+### SimText App ###
+#
+# The tool enables the exploration of data generated by text_to_wordmatrix or pmids_to_pubtator_matrix in a locally run ShinyApp. Features are word clouds for each initial search query, dimension reduction and hierarchical clustering of the binary matrix, and a table with words and their frequency among the search queries. 
+# 
+# Input:
+#   
+#   1)	Input 1: 
+#   Tab-delimited table with 
+# - column with search queries starting with "ID_", e.g. "ID_gene" if initial search queries were genes 
+# - column(s) with grouping factor(s) to compare pre-existing categories of the initial search queries with the grouping based on text. The column names should start with "GROUPING_". If the column name is "GROUPING_disorder", "disorder" will be shown as a grouping variable in the app.
+#  2)	Input 2: 
+#   Output of text_to_wordmatrix or pmids_to_pubtator_matrix, or binary matrix.
+# 
+# optional arguments:
+# -h, --help                    show help message
+# -i INPUT, --input INPUT       input file name. add path if file is not in working directory
+# -m MATRIX, --matrix MATRIX    matrix file name. add path if file is not in working directory
+# -p PORT, --port PORT          specify port, otherwise randomly selected
+#
+#Output: 
+#Shiny app with word clouds, dimensionality reduction plot, dendrogram of hierarchical clustering and table with words and their frequency among the entities.
+#
+#Packages
+
+if ( '--install_packages' %in% commandArgs()) {
+  print('Installing packages')
+  if (!require('shiny')) install.packages('shiny', repo="http://cran.rstudio.com/");
+  if (!require('plotly')) install.packages('plotly', repo="http://cran.rstudio.com/");
+  if (!require('DT')) install.packages('DT', repo="http://cran.rstudio.com/");
+  if (!require('shinycssloaders')) install.packages('shinycssloaders', repo="http://cran.rstudio.com/");
+  if (!require('shinythemes')) install.packages('shinythemes', repo="http://cran.rstudio.com/");
+  if (!require('tableHTML')) install.packages('tableHTML', repo="http://cran.rstudio.com/");
+  if (!require('argparse')) install.packages('argparse', repo="http://cran.rstudio.com/");
+  if (!require('PubMedWordcloud')) install.packages('PubMedWordcloud', repo="http://cran.rstudio.com/");
+  if (!require('ggplot2')) install.packages('ggplot2', repo="http://cran.rstudio.com/");
+  if (!require('stringr')) install.packages('stringr', repo="http://cran.rstudio.com/");
+  if (!require('tidyr')) install.packages('tidyr', repo="http://cran.rstudio.com/");
+  if (!require('magrittr')) install.packages('magrittr', repo="http://cran.rstudio.com/");
+  if (!require('plyr')) install.packages('plyr', repo="http://cran.rstudio.com/");
+  if (!require('ggpubr')) install.packages('ggpubr', repo="http://cran.rstudio.com/");
+  if (!require('rafalib')) install.packages('rafalib', repo="http://cran.rstudio.com/");
+  if (!require('RColorBrewer')) install.packages('RColorBrewer', repo="http://cran.rstudio.com/");
+  if (!require('dendextend')) install.packages('dendextend', repo="http://cran.rstudio.com/");
+  if (!require('Rtsne')) install.packages('Rtsne', repo="http://cran.rstudio.com/");
+  if (!require('umap')) install.packages('umap', repo="http://cran.rstudio.com/");
+}
+
+suppressPackageStartupMessages(library("shiny"))
+suppressPackageStartupMessages(library("plotly"))
+suppressPackageStartupMessages(library("DT"))
+suppressPackageStartupMessages(library("shinycssloaders"))
+suppressPackageStartupMessages(library("shinythemes"))
+suppressPackageStartupMessages(library("tableHTML"))
+suppressPackageStartupMessages(library("argparse"))
+suppressPackageStartupMessages(library("PubMedWordcloud"))
+suppressPackageStartupMessages(library("ggplot2"))
+suppressPackageStartupMessages(library("stringr"))
+suppressPackageStartupMessages(library("tidyr"))
+suppressPackageStartupMessages(library("magrittr"))
+suppressPackageStartupMessages(library("plyr"))
+suppressPackageStartupMessages(library("ggpubr"))
+suppressPackageStartupMessages(library("rafalib"))
+suppressPackageStartupMessages(library("RColorBrewer"))
+suppressPackageStartupMessages(library("dendextend"))
+suppressPackageStartupMessages(library("Rtsne"))
+suppressPackageStartupMessages(library("umap"))
+
+#command arguments
+parser <- ArgumentParser()
+parser$add_argument("-i", "--input", 
+                    help = "input file name. add path if file is not in working directory")
+parser$add_argument("-m", "--matrix", default= NULL,
+                    help = "matrix file name. add path if file is not in working directory")
+parser$add_argument("--host", default=NULL,
+                    help="Specify host")
+parser$add_argument("-p", "--port", type="integer", default=NULL,
+                    help="Specify port, otherwise randomly select")
+parser$add_argument("--install_packages", action="store_true", default=FALSE,
+                    help="If you want to auto install missing required packages.")
+args <- parser$parse_args()
+
+# Set host
+if(!is.null(args$host)){
+  options(shiny.host = args$host)
+}
+
+# Set port
+if(!is.null(args$port)){
+  options(shiny.port = args$port)
+}
+  
+#load data
+data = read.delim(args$input, stringsAsFactors=FALSE)
+index_grouping = grep("GROUPING_", names(data))
+names(data)[index_grouping] = sub(".*_", "",names(data)[index_grouping])
+colindex_id = grep("^ID_", names(data))
+
+matrix = read.delim(args$matrix, check.names = FALSE, header = TRUE, sep='\t')
+matrix = (as.matrix(matrix)>0) *1 #transform matrix to binary matrix
+           
+##### UI ######
+ui <- shinyUI(fluidPage(
+  navbarPage(theme = shinytheme("flatly"), id = "inTabset",selected = "panel1",
+             title = "SimText",
+             tabPanel("Home", value = "panel1",
+             tabPanel("Results", value = "panel1",
+      fluidRow(width=12, offset=0,
+           column(width = 4,  style = "padding-right: 0px",
+                  wellPanel(h5(strong("ID of interest")),
+                            style = "background-color:white;
+                            border-bottom: 2px solid #EEEEEE;
+                            border-top-color: white;
+                            border-right-color: white;
+                            border-left-color: white;
+                            box-shadow: 0px 0px 0px white;
+                            padding:3px;
+                            width: 100%"),
+                  selectInput('ID', 'Select ID:', paste0(data[[colindex_id]]," (",seq(1,length(data[[colindex_id]])),")"))),
+           column(width = 3,  style = "padding-right: 0px",
+                  wellPanel(h5(strong("Color settings")),
+                            style = "background-color:white;
+                            border-bottom: 2px solid #EEEEEE;
+                            border-top-color: white;
+                            border-right-color: white;
+                            border-left-color: white;
+                            box-shadow: 0px 0px 0px white;
+                            padding:3px;
+                            width: 100%"),
+                  radioButtons('colour', 'Color by:', c("Grouping variable", "Individual word")), 
+                  selectInput("colour_select", "Select:",  choices=c(names(data)[index_grouping])))
+           ),
+    fluidRow(width = 12, offset = 0,
+           column(width = 4, #style = "height:650px;",
+                  wellPanel(textOutput("ID"),
+                            style = "background-color: #333333;
+                            color: white;
+                            border-top-color: #333333;
+                            border-left-color: #333333;
+                            border-right-color: #333333;
+                            box-shadow: 3px 3px 3px #d8d8d8;
+                            margin-bottom: 0px;
+                            padding:5px"), 
+                  wellPanel(
+                    fluidRow(
+                      column(width = 4,
+                             numericInput('fontsize', 'Font size:',value = 7, min=1, max=50)),
+                      column(width = 4,
+                             numericInput('nword', 'Word number:',value = 50, min=1, max=100)),
+                      column(width = 12,
+                            withSpinner(plotOutput("WordcloudPlot",height= "325px"))),
+                      column(width = 12,
+                            downloadLink("downloadWordcloud", "Download"))),
+                            style = "background-color: #ffffff;
+                            border-bottom-color: #333333;
+                            border-left-color: #333333;
+                            height: 470px;
+                            border-right-color: #333333;
+                            box-shadow: 3px 3px 3px #d8d8d8;
+                            margin-top: 0px"),
+                  wellPanel(textOutput("Table"),
+                            style = "background-color: #333333;
+                            color: white;
+                            border-top-color: #333333;
+                            border-left-color: #333333;
+                            border-right-color: #333333;
+                            box-shadow: 3px 3px 3px #d8d8d8;
+                            margin-bottom: 0px;
+                            padding:5px"),
+                  wellPanel(withSpinner(DT::dataTableOutput("datatable", height= "150px")),
+                            style = "background-color: #ffffff;
+                            border-bottom-color: #333333;
+                            border-left-color: #333333;
+                            border-right-color: #333333;
+                            height: 175px;
+                            box-shadow: 3px 3px 3px #d8d8d8;
+                            margin-top: 0px")
+                  ), 
+           column(width = 8, #style='padding:0px;',
+                  wellPanel("T-SNE plot of wordmatrix",
+                            style = "background-color: #333333;
+                            color: white;
+                            border-top-color: #333333;
+                            border-left-color: #333333;
+                            border-right-color: #333333;
+                            box-shadow: 3px 3px 3px #d8d8d8;
+                            margin-bottom: 0px;
+                            padding:5px"), 
+                  wellPanel( 
+                     fluidRow(
+                           column(width = 2,
+                                  radioButtons('method', 'Method:',choices=c("t-SNE","UMAP"))),
+                           column(width = 2,
+                                  numericInput('perplexity', 'Perplexity:',value = 2, min=1, max=nrow(data)-1)),
+                           column(width = 2,
+                                  radioButtons('label', 'Labels:',choices=c("Index","IDs"))),
+                           column(width = 2,
+                                  numericInput('labelsize', 'Label size:',value = 12, min=1, max=30)),
+                           column(width = 8, style='padding:0px;',
+                                  withSpinner(plotlyOutput("TsnePlot",height=550))),
+                           column(width = 4, style='padding:0px;',
+                                  withSpinner(plotOutput("TsnePlot_legend",height=550))),
+                           column(width=2, 
+                                  downloadLink("downloadPlotdata",label = "Download data"))),
+                            style = "background-color: white;
+                            border-bottom-color: #333333;
+                            border-left-color:  #333333;
+                            border-right-color: #333333;
+                            box-shadow: 3px 3px 3px #d8d8d8;
+                            margin-top: 0px"
+                            #height=575px
+                       ))),
+    fluidRow(column(width = 12,
+                  wellPanel("Hierarchical clustering of wordmatrix",
+                            style = "background-color: #333333;
+                            color: white;
+                            border-top-color: #333333;
+                            border-left-color: #333333;
+                            border-right-color: #333333;
+                            box-shadow: 3px 3px 3px #d8d8d8;
+                            margin-bottom: 0px;
+                            padding:5px")
+                  ,
+                  wellPanel(
+                    fluidRow(
+                          column(width = 2,
+                                 radioButtons('hcmethod', 'Method:',choices=c("ward.D2","average","complete","single"))),
+                          column(width = 2,
+                                 numericInput('labelsize_hc', 'Label size:', value = 8, min=1, max=30))
+                          ),
+                      fluidRow(
+                          column(width = 9,
+                                withSpinner(plotOutput("hclust"))),
+                          column(width = 3, 
+                                 withSpinner(plotOutput("hclust_legend")))
+                          ),
+                                style = "background-color: #ffffff;
+                                border-bottom-color: #333333;
+                                border-left-color: #333333;
+                                border-right-color: #333333;
+                                box-shadow: 3px 3px 3px #d8d8d8;
+                                margin-top: 0px")
+                  ,
+                  verbatimTextOutput("test")
+                ))
+    ))
+    
+  #  ,
+  #tabPanel("About", value = "panel2", h3(""))
+  
+  )))
+           
+         
+         
+
+###### SERVER ###### 
+server <- function(input, output, session) {
+
+  ##### Global ##### 
+  IDs = reactive(paste0(data[[colindex_id]]," (",seq(1,length(data[[colindex_id]])),")"))
+  index_ID = reactive({which(IDs() == input$ID)})
+
+  ##### Wordcloud plot and download  ######
+  
+  output$ID <- renderText({
+    paste("Wordcloud of",data[[colindex_id]][index_ID()])
+  })
+  
+  output$WordcloudPlot <- renderPlot({
+    ID_matrix = matrix[index_ID(),]
+    ID_matrix = data.frame(word= as.character(names(ID_matrix)), freq= ID_matrix)
+    colnames(ID_matrix) = c("word", "freq")
+    ID_matrix = ID_matrix[ID_matrix$freq == 1,]
+    
+    plotWordCloud(ID_matrix, 
+                  max.words = min(nrow(ID_matrix),input$nword), 
+                  scale= c(input$fontsize/10, input$fontsize/10), 
+                  colors= brewer.pal(8,"Greys")[4:8])
+  })
+  
+  output$downloadWordcloud <- downloadHandler(
+    filename = function() {
+      paste0(paste0("Wordcloudof",data[[colindex_id]][index_ID()]),".pdf", sep="")
+    },
+    content = function(file) {
+      ID_matrix = matrix[index_ID(),]
+      ID_matrix = data.frame(word= names(ID_matrix), freq= ID_matrix)
+      colnames(ID_matrix) = c("word", "freq")
+      ID_matrix = ID_matrix[ID_matrix$freq == 1,]
+
+      pdf(file)
+      plotWordCloud(ID_matrix, 
+                    max.words = min(max(nrow(ID_matrix)),input$nword), 
+                    scale= c(input$fontsize/10, input$fontsize/10), 
+                    colors= brewer.pal(8,"Greys")[4:8])
+      dev.off()
+    }
+  )
+  
+  ##### Table #####
+  output$Table  <-  renderText({
+    paste("Most occuring words among IDs")
+  })
+  
+  output$datatable <- DT::renderDataTable({
+    
+    colsum_data= data.frame(word=colnames(matrix), freq=colSums(matrix))
+    colsum_data = colsum_data[order(colsum_data$freq, decreasing = T),]
+    colnames(colsum_data) = c("Word", paste0("IDs (total n=", nrow(matrix),")"))
+
+    DT::datatable(colsum_data,
+                  extensions = c("Buttons"),
+                  rownames = F,
+                  fillContainer = T,
+                  escape=FALSE,
+                  options = list(dom = "t",
+                                 scrollY = min(nrow(colsum_data),500),
+                                 scrollX= TRUE,
+                                 scroller = TRUE,
+                                 autoWidth = TRUE,
+                                 pageLength = nrow(colsum_data),
+                                 columnDefs = list(list(className = 'dt-center', targets = "_all"), 
+                                 list(width = '50%', targets = "_all")))
+                  )
+  })
+  
+  ##### Colour/Grouping #####
+  
+  outVar <- reactive({
+    if(input$colour == "Grouping variable"){
+      return(names(data)[index_grouping])
+    } else {
+      return(colnames(matrix))
+    }
+  })
+
+  observe({ 
+    updateSelectInput(session, "colour_select", choices = outVar())})
+  
+  colour_choice =  reactive({
+    if(input$colour == "Grouping variable"){
+      return(as.factor(data[,input$colour_select]))
+  } else {
+      matrix = as.data.frame(matrix)
+      colour_byword = matrix[[input$colour_select]]
+      colour_byword = ifelse(colour_byword > 0,"Selected word associated with ID","Selected word not associated with ID")
+      return(as.factor(colour_byword))
+  }
+    })
+
+  color_palette = reactive({palette=c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", 
+                                      "#E31A1C", "#FDBF6F", "#FF7F00", "#CAB2D6", "#6A3D9A",
+                                      "#00AFBB", "#E7B800", "#FC4E07", "#999999", "#E69F00", 
+                                      "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00")
+                            return( palette[1:length(levels(colour_choice()))] ) 
+  })
+  
+  ##### Dimension reduction plot and download #####
+ 
+   data.dimred = reactive({
+    if (input$method == "t-SNE"){
+      tsne_result <- Rtsne(matrix, perplexity = input$perplexity, check_duplicates=F)
+      data["X_Coord"] = tsne_result$Y[,1]
+      data["Y_Coord"] = tsne_result$Y[,2]
+      return(data)
+    } else if (input$method == "UMAP"){
+      umap_result = umap(matrix)
+      data["X_Coord"] = umap_result$layout[,1]
+      data["Y_Coord"] = umap_result$layout[,2]
+      return(data)
+    }  
+  })
+  
+  output$TsnePlot <- renderPlotly({
+    
+    if (input$label == "Index") {
+        labeling = as.character(seq(1,nrow(data)))
+    } else if (input$label == "IDs") {
+        labeling= as.character(data[[colindex_id]])
+    }
+    
+    p = plot_ly(colors = color_palette()) %>%
+      add_trace(type="scatter",
+                mode = 'markers',
+                x = data.dimred()$X_Coord[index_ID()],
+                y = data.dimred()$Y_Coord[index_ID()],
+                opacity=0.15,
+                marker = list(
+                  color = "grey",
+                  size = 80)) %>%
+      add_trace(x=data.dimred()$X_Coord, 
+                y=data.dimred()$Y_Coord, 
+                type="scatter", 
+                mode="text",
+                text= labeling,
+                textfont = list(size= input$labelsize),
+                color = factor(colour_choice())) %>%
+      add_trace(x=data.dimred()$X_Coord, 
+                y=data.dimred()$Y_Coord, 
+                type="scatter", 
+                mode="markers",
+                opacity=0,
+                text= paste0( "ID: ",data[[colindex_id]], "\n",
+                              "Index: ",seq(1,nrow(data)), "\n",
+                              "Grouping: ", paste(data[,index_grouping])),
+                hoverinfo = "text",
+                color = factor(colour_choice())) %>% 
+      layout(showlegend = FALSE,
+             yaxis= list(title = "",
+                         zeroline = FALSE,
+                         linecolor = toRGB("black"),
+                         linewidth = 1,
+                         showticklabels = FALSE,
+                         showgrid = FALSE),
+             xaxis = list(title = "",
+                          zeroline = FALSE,
+                          linecolor = toRGB("black"),
+                          linewidth = 1,
+                          showticklabels = FALSE,
+                          showgrid = FALSE),
+             autosize = T) %>% 
+      config(modeBarButtonsToRemove = c("zoomIn2d", "zoomOut2d", "hoverClosestGeo", "hoverClosestGl2d", "toImage",
+                                        "hoverClosestCartesian", "lasso2d", "select2d", "resetScale2d",
+                                        "hoverCompareCartesian", "hoverClosestPie", "toggleSpikelines"), displaylogo = FALSE) %>% 
+      style(hoverinfo = "none", traces = c(1,2))
+    
+    p
+  })
+  
+  #legend of plotly plot by ggplot
+  
+  output$TsnePlot_legend <- renderPlot({
+    p = ggplot(data, aes(x=1, y=1)) +
+        geom_text(aes(label=seq(1,nrow(data)), colour=factor(colour_choice())), 
+                size=3.5, fontface = "bold") +
+        theme_classic()+
+        scale_color_manual(values = color_palette())+
+        theme(legend.title = element_blank())+
+        theme(legend.position = "right")+
+        theme(legend.text=element_text(size=9))
+    leg <- get_legend(p)
+    as_ggplot(leg)
+  })
+  
+  output$downloadPlotdata <- downloadHandler(
+    filename = function() {
+      paste0(input$method,"_coordinates.csv")
+    },
+    content = function(file) {
+      write.csv(data.dimred(), file, row.names = F)
+    }
+  )
+
+  ##### Hierarchical clustering #######  
+  
+  output$hclust <- renderPlot({
+    set.seed(42)
+    clustering=hclust(dist(matrix), method=input$hcmethod)
+    par(oma=c(3,3,3,3))
+    palette(color_palette())
+    par(mar = rep(0, 4))
+    myplclust(clustering, 
+              labels=paste(data[[colindex_id]]), 
+              lab.col=as.fumeric(as.character(colour_choice()), levels = sort(unique(as.character(colour_choice())))), 
+              cex=as.numeric(input$labelsize_hc/10),
+              main="",
+              yaxt="n",
+              ylab= "")
+    })
+  
+    #legend
+    output$hclust_legend <- renderPlot({
+      p = ggplot(data, aes(x=1, y=1)) +
+        geom_text(aes(label=seq(1,nrow(data)), colour=factor(colour_choice())), fontface = "bold") +
+        theme_classic()+
+        scale_color_manual(values = color_palette())+
+        theme(legend.title = element_blank())+
+        theme(legend.position = "right")+
+        theme(legend.text=element_text(size=9))
+      leg <- get_legend(p)
+      as_ggplot(leg)
+    })
+    
+    
+  ##### Test field for development ######
+  #output$test <- renderPrint({
+  #})
+  
+    }
+
+###### APP ######
+shinyApp(ui, server)