Mercurial > repos > iuc > isoformswitchanalyzer

diff IsoformSwitchAnalyzeR.R @ 0:f3fefb6d8254 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/isoformswitchanalyzer commit 2c61e4c6151000201dd9a8323722a380bc235380
author: iuc
date: Tue, 24 Jan 2023 18:37:14 +0000
children: 2c4e879a81cf
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/IsoformSwitchAnalyzeR.R	Tue Jan 24 18:37:14 2023 +0000
@@ -0,0 +1,924 @@
+# Load the IsoformSwitchAnalyzeR library
+library(IsoformSwitchAnalyzeR,
+        quietly = TRUE,
+        warn.conflicts = FALSE)
+library(argparse, quietly = TRUE, warn.conflicts = FALSE)
+library(dplyr, quietly = TRUE, warn.conflicts = FALSE)
+
+# setup R error handling to go to stderr
+options(
+  show.error.messages = FALSE,
+  error = function() {
+    cat(geterrmessage(), file = stderr())
+    q("no", 1, FALSE)
+  }
+)
+
+# we need that to not crash galaxy with an UTF8 error on German LC settings.
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+
+################################################################################
+### Input Processing
+################################################################################
+
+
+# Collect arguments from command line
+parser <- ArgumentParser(description = "IsoformSwitcheR R script")
+
+parser$add_argument("--modeSelector")
+parser$add_argument("--parentDir",  required = FALSE, help = "Parent directory")
+parser$add_argument("--readLength",
+                    required = FALSE,
+                    type = "integer",
+                    help = "Read length (required for stringtie)")
+parser$add_argument("--annotation", required = FALSE, help = "Annotation")
+parser$add_argument("--transcriptome", required = FALSE, help = "Transcriptome")
+parser$add_argument(
+  "--fixStringTieAnnotationProblem",
+  action = "store_true",
+  required = FALSE,
+  help = "Fix StringTie annotation problem"
+)
+parser$add_argument("--countFiles", required = FALSE, help = "Count files")
+parser$add_argument("--toolSource", required = FALSE, help = "Tool source")
+parser$add_argument("--rObject", required = FALSE, help = "R object")
+parser$add_argument("--IFcutoff",
+                    required = FALSE,
+                    type = "numeric",
+                    help = "IFcutoff")
+parser$add_argument(
+  "--geneExpressionCutoff",
+  required = FALSE,
+  type = "numeric",
+  help = "Gene expression cutoff"
+)
+parser$add_argument(
+  "--isoformExpressionCutoff",
+  required = FALSE,
+  type = "numeric",
+  help = "Isoform expression cutoff"
+)
+parser$add_argument("--alpha",
+                    required = FALSE,
+                    type = "numeric",
+                    help = "")
+parser$add_argument("--dIFcutoff",
+                    required = FALSE,
+                    type = "numeric",
+                    help = "dIF cutoff")
+parser$add_argument(
+  "--onlySigIsoforms",
+  required = FALSE,
+  action = "store_true",
+  help = "Only significative isoforms"
+)
+parser$add_argument(
+  "--filterForConsequences",
+  required = FALSE,
+  action = "store_true",
+  help = "Filter for consequences"
+)
+parser$add_argument(
+  "--removeSingleIsformGenes",
+  required = FALSE,
+  action = "store_true",
+  help = "Remove single isoform genes"
+)
+parser$add_argument(
+  "--keepIsoformInAllConditions",
+  required = FALSE,
+  action = "store_true",
+  help = "Keep isoform in all conditions"
+)
+parser$add_argument(
+  "--correctForConfoundingFactors",
+  required = FALSE,
+  action = "store_true",
+  help = "Correct for confunding factors"
+)
+parser$add_argument(
+  "--overwriteIFvalues",
+  required = FALSE,
+  action = "store_true",
+  help = "Overwrite IF values"
+)
+parser$add_argument(
+  "--reduceToSwitchingGenes",
+  required = FALSE,
+  action = "store_true",
+  help = "Reduce to switching genes"
+)
+parser$add_argument(
+  "--reduceFurtherToGenesWithConsequencePotential",
+  required = FALSE,
+  action = "store_true",
+  help = "Reduce further to genes with consequence potential"
+)
+parser$add_argument(
+  "--keepIsoformInAllConditions2",
+  required = FALSE,
+  action = "store_true",
+  help = "Keep isoform in ll conditions"
+)
+parser$add_argument("--minORFlength",
+                    required = FALSE,
+                    type = "integer",
+                    help = "")
+parser$add_argument("--orfMethod", required = FALSE, help = "ORF methods")
+parser$add_argument("--PTCDistance",
+                    required = FALSE,
+                    type = "integer",
+                    help = "")
+parser$add_argument(
+  "--removeShortAAseq",
+  required = FALSE,
+  action = "store_true",
+  help = "Remove short aminoacid sequences"
+)
+parser$add_argument(
+  "--removeLongAAseq",
+  required = FALSE,
+  action = "store_true",
+  help = "Remove long aminoacid sequences"
+)
+parser$add_argument(
+  "--removeORFwithStop",
+  required = FALSE,
+  action = "store_true",
+  help = "Remove ORF with stop codon"
+)
+parser$add_argument(
+  "--onlySwitchingGenes",
+  required = FALSE,
+  action = "store_true",
+  help = "Only switching genes"
+)
+parser$add_argument("--analysisMode", required = FALSE, help = "Analyze all isoforms with differential usage or single genes")
+parser$add_argument(
+  "--genesToPlot",
+  type = "integer",
+  default = 10,
+  required = FALSE,
+  help = "Number of genes to plot"
+)
+parser$add_argument("--gene", required = FALSE, help = "Gene ID to analyze")
+parser$add_argument(
+  "--sortByQvals",
+  action = "store_true",
+  required = FALSE,
+  help = "Sort genes by Q-val values"
+)
+parser$add_argument("--countGenes",
+                    action = "store_true",
+                    required = FALSE,
+                    help = "Count genes")
+parser$add_argument(
+  "--asFractionTotal",
+  action = "store_true",
+  required = FALSE,
+  help = "Plot gene expresson as fraction of total"
+)
+parser$add_argument("--plotGenes",
+                    action = "store_true",
+                    required = FALSE,
+                    help = "Plot genes instead of isoforms")
+parser$add_argument(
+  "--simplifyLocation",
+  action = "store_true",
+  required = FALSE,
+  help = "Simplify localtion"
+)
+parser$add_argument(
+  "--removeEmptyConsequences",
+  action = "store_true",
+  required = FALSE,
+  help = "Remove empty consequences"
+)
+parser$add_argument(
+  "--analysisOppositeConsequence",
+  action = "store_true",
+  required = FALSE,
+  help = "Analysi opposite consequences"
+)
+parser$add_argument("--pathToCPATresultFile",
+                    required = FALSE,
+                    help = "Path to CPAT result file")
+parser$add_argument("--pathToCPC2resultFile",
+                    required = FALSE,
+                    help = "Path to CPC2 result file")
+parser$add_argument("--pathToPFAMresultFile",
+                    required = FALSE,
+                    help = "Path to PFAM result file")
+parser$add_argument("--pathToNetSurfP2resultFile",
+                    required = FALSE,
+                    help = "Path to NetSurfP2 result file")
+parser$add_argument("--pathToSignalPresultFile",
+                    required = FALSE,
+                    help = "Path to signalP result file")
+parser$add_argument("--pathToIUPred2AresultFile",
+                    required = FALSE,
+                    help = "Path to IUPred2A result file")
+parser$add_argument("--codingCutoff",
+                    required = FALSE,
+                    type = "numeric",
+                    help = "Codding cutoff")
+parser$add_argument(
+  "--removeNoncodingORFs",
+  action = "store_true",
+  required = FALSE,
+  help = "Remove non-coding ORFs"
+)
+parser$add_argument(
+  "--minSignalPeptideProbability",
+  required = FALSE,
+  type = "numeric",
+  help = "Minimul signal peptide probability"
+)
+parser$add_argument(
+  "--smoothingWindowSize",
+  type = "integer",
+  required = FALSE,
+  help = "Smoothing windows size"
+)
+parser$add_argument(
+  "--probabilityCutoff",
+  required = FALSE,
+  type = "double",
+  help = "Probability cutoff"
+)
+parser$add_argument("--minIdrSize",
+                    required = FALSE,
+                    type = "integer",
+                    help = "Min Idr size")
+parser$add_argument(
+  "--annotateBindingSites",
+  action = "store_true",
+  required = FALSE,
+  help = "Annotate binding sites"
+)
+parser$add_argument(
+  "--minIdrBindingSize",
+  required = FALSE,
+  type = "integer",
+  help = "Minimun Idr binding size"
+)
+parser$add_argument(
+  "--minIdrBindingOverlapFrac",
+  required = FALSE,
+  type = "numeric",
+  help = ""
+)
+parser$add_argument("--ntCutoff",
+                    required = FALSE,
+                    type = "integer",
+                    help = "Nucleotide cutoff")
+parser$add_argument("--ntFracCutoff",
+                    required = FALSE,
+                    type = "numeric",
+                    help = "Nucleotide fraction cutoff")
+parser$add_argument(
+  "--ntJCsimCutoff",
+  required = FALSE,
+  type = "numeric",
+  help = "Nucleotide Jaccard simmilarity cutoff"
+)
+parser$add_argument("--AaCutoff",
+                    required = FALSE,
+                    type = "integer",
+                    help = "Aminoacid cutoff")
+parser$add_argument("--AaFracCutoff",
+                    required = FALSE,
+                    type = "numeric",
+                    help = "Aminoacid fraction cutoff")
+parser$add_argument(
+  "--AaJCsimCutoff",
+  required = FALSE,
+  type = "numeric",
+  help = "Aminoacid Jaccard similarity cutoff"
+)
+parser$add_argument(
+  "--removeNonConseqSwitches",
+  action = "store_true",
+  required = FALSE,
+  help = "Remove switches without consequences"
+)
+parser$add_argument(
+  "--rescaleTranscripts",
+  action = "store_true",
+  required = FALSE,
+  help = "Rescale transcripts"
+)
+parser$add_argument(
+  "--reverseMinus",
+  action = "store_true",
+  required = FALSE,
+  help = "Reverse minus"
+)
+parser$add_argument(
+  "--addErrorbars",
+  action = "store_true",
+  required = FALSE,
+  help = "Add error bars"
+)
+
+
+args <- parser$parse_args()
+
+# Data import
+###################
+
+if (args$modeSelector == "data_import") {
+
+  quantificationData <- importIsoformExpression(
+    parentDir = args$parentDir,
+    addIsofomIdAsColumn = TRUE,
+    readLength = args$readLength
+  )
+
+  ### Make design matrix
+  myDesign <- data.frame(
+    sampleID = colnames(quantificationData$abundance)[-1],
+    condition = gsub(
+      "[[:digit:]]+",
+      "",
+      colnames(quantificationData$abundance)[-1]
+    )
+  )
+
+  if (args$toolSource == "stringtie") {
+    SwitchList <- importRdata(
+      isoformCountMatrix   = quantificationData$counts,
+      isoformRepExpression = quantificationData$abundance,
+      designMatrix         = myDesign,
+      isoformExonAnnoation = args$annotation,
+      isoformNtFasta       = args$transcriptome,
+      showProgress = TRUE,
+      fixStringTieAnnotationProblem = args$fixStringTieAnnotationProblem
+    )
+  } else {
+    SwitchList <- importRdata(
+      isoformCountMatrix   = quantificationData$counts,
+      isoformRepExpression = quantificationData$abundance,
+      designMatrix         = myDesign,
+      isoformExonAnnoation = args$annotation,
+      isoformNtFasta       = args$transcriptome,
+      showProgress = TRUE
+    )
+  }
+
+
+  geneCountMatrix <- extractGeneExpression(
+    SwitchList,
+    extractCounts = TRUE,
+    addGeneNames = FALSE,
+    addIdsAsColumns = FALSE
+  )
+
+  if (args$countFiles == "collection") {
+
+    expressionDF <- data.frame(geneCountMatrix)
+
+    myDesign$condition[length(myDesign$condition)]
+
+    dataframe_factor1 <- expressionDF %>% select(matches(myDesign$condition[1]))
+    dataframe_factor2 <- expressionDF %>% select(matches(myDesign$condition[length(myDesign$condition)]))
+
+
+    lf1 <- as.list(as.data.frame(dataframe_factor1))
+    sampleNames1 <- colnames(as.data.frame(dataframe_factor1))
+
+    lf2 <- as.list(as.data.frame(dataframe_factor2))
+    sampleNames2 <- colnames(as.data.frame(dataframe_factor2))
+
+    geneNames <- row.names(as.data.frame(expressionDF))
+
+
+    for (index in seq_along(lf1)) {
+      tabular_expression <- data.frame(geneNames, lf1[index])
+      colnames(tabular_expression) <-
+        c("Geneid", sampleNames1[index])
+      filename <-
+        paste(sampleNames1[index], "dataset.tabular", sep = "_")
+      output_path <- paste("./count_files/factor1/", filename, sep = "")
+      write.table(
+        tabular_expression,
+        output_path,
+        sep = "\t",
+        row.names = FALSE,
+        quote = FALSE
+      )
+    }
+    for (index in seq_along(lf2)) {
+      tabular_expression <- data.frame(geneNames, lf2[index])
+      colnames(tabular_expression) <-
+        c("Geneid", sampleNames2[index])
+      filename <-
+        paste(sampleNames2[index], "dataset.tabular", sep = "_")
+      output_path <- paste("./count_files/factor2/", filename, sep = "")
+      write.table(
+        tabular_expression,
+        output_path,
+        sep = "\t",
+        row.names = FALSE,
+        quote = FALSE
+      )
+    }
+  } else if (args$countFiles == "matrix") {
+    expressionDF <- data.frame(geneCountMatrix)
+    geneNames <- row.names(expressionDF)
+
+    expressionDF <- cbind(geneNames, expressionDF)
+    write.table(
+      as.data.frame(expressionDF),
+      "./count_files/matrix.tabular",
+      sep = "\t",
+      row.names = FALSE,
+      quote = FALSE
+    )
+    write.table(
+      as.data.frame(myDesign),
+      "./count_files/samples.tabular",
+      sep = "\t",
+      row.names = FALSE,
+      quote = FALSE
+    )
+  }
+
+  save(SwitchList, file = "SwitchList.Rda")
+
+}
+
+if (args$modeSelector == "first_step") {
+
+  # First part of the analysis
+  #############################
+
+  load(file = args$rObject)
+
+  ### Filter
+  SwitchList <- preFilter(
+    SwitchList,
+    IFcutoff = args$IFcutoff,
+    geneExpressionCutoff = args$geneExpressionCutoff,
+    isoformExpressionCutoff = args$isoformExpressionCutoff,
+    removeSingleIsoformGenes = args$removeSingleIsformGenes,
+    onlySigIsoforms = args$onlySigIsoforms,
+    keepIsoformInAllConditions = args$keepIsoformInAllConditions,
+    alpha = args$alpha,
+    dIFcutoff = args$dIFcutoff,
+  )
+
+  ### Test for isoform switches
+  SwitchList <- isoformSwitchTestDEXSeq(
+    SwitchList,
+    alpha = args$alpha,
+    dIFcutoff = args$dIFcutoff,
+    correctForConfoundingFactors = args$correctForConfoundingFactors,
+    overwriteIFvalues = args$overwriteIFvalues,
+    reduceToSwitchingGenes = args$reduceToSwitchingGenes,
+    reduceFurtherToGenesWithConsequencePotential = args$reduceFurtherToGenesWithConsequencePotential,
+    onlySigIsoforms = args$onlySigIsoforms,
+    keepIsoformInAllConditions = args$keepIsoformInAllConditions2,
+    showProgress = TRUE,
+  )
+
+  SwitchList <- analyzeNovelIsoformORF(
+    SwitchList,
+    analysisAllIsoformsWithoutORF = TRUE,
+    minORFlength = args$minORFlength,
+    orfMethod = args$orfMethod,
+    PTCDistance = args$PTCDistance,
+    startCodons = "ATG",
+    stopCodons = c("TAA", "TAG", "TGA"),
+    showProgress = TRUE,
+  )
+
+  ### Extract Sequences
+  SwitchList <- extractSequence(
+    SwitchList,
+    onlySwitchingGenes = args$onlySwitchingGenes,
+    alpha = args$alpha,
+    dIFcutoff = args$dIFcutoff,
+    extractNTseq = TRUE,
+    extractAAseq = TRUE,
+    removeShortAAseq = args$removeShortAAseq,
+    removeLongAAseq = args$removeLongAAseq,
+    removeORFwithStop = args$removeORFwithStop,
+    addToSwitchAnalyzeRlist = TRUE,
+    writeToFile = TRUE,
+    pathToOutput = getwd(),
+    outputPrefix = "isoformSwitchAnalyzeR_isoform",
+    forceReExtraction = FALSE,
+    quiet = FALSE
+  )
+
+  ### Summary
+  switchSummary <- extractSwitchSummary(
+    SwitchList,
+    filterForConsequences = args$filterForConsequences,
+    alpha = args$alpha,
+    dIFcutoff = args$dIFcutoff,
+    onlySigIsoforms = args$onlySigIsoforms,
+  )
+
+  save(SwitchList, file = "SwitchList.Rda")
+  write.table(
+    switchSummary,
+    file = "switchSummary.tsv",
+    quote = FALSE,
+    sep = "\t",
+    col.names = TRUE,
+    row.names = FALSE
+  )
+
+}
+
+if (args$modeSelector == "second_step") {
+
+  # Second part of the analysis
+  #############################
+
+  load(file = args$rObject)
+
+  ### Add annotation
+  if (!is.null(args$pathToCPATresultFile)) {
+    SwitchList <- analyzeCPAT(
+      SwitchList,
+      pathToCPATresultFile = args$pathToCPATresultFile,
+      codingCutoff = args$codingCutoff,
+      removeNoncodinORFs        = args$removeNoncodingORFs
+    )
+  }
+
+  if (!is.null(args$pathToCPC2resultFile)) {
+    SwitchList <- analyzeCPC2(
+      SwitchList,
+      pathToCPC2resultFile = args$pathToCPC2resultFile,
+      removeNoncodinORFs = args$removeNoncodingORFs
+    )
+  }
+
+  if (!is.null(args$pathToPFAMresultFile)) {
+    pfamFiles <- list.files(path = args$pathToPFAMresultFile,
+                            full.names = TRUE)
+
+    SwitchList <- analyzePFAM(SwitchList,
+                              pathToPFAMresultFile =  pfamFiles,
+                              showProgress = FALSE)
+  }
+
+  if (!is.null(args$pathToNetSurfP2resultFile)) {
+    netsurfFiles <- list.files(path = args$pathToNetSurfP2resultFile,
+                               full.names = TRUE)
+
+    SwitchList <- analyzeNetSurfP2(
+      SwitchList,
+      pathToNetSurfP2resultFile =  netsurfFiles,
+      smoothingWindowSize = args$smoothingWindowSize,
+      probabilityCutoff = args$probabilityCutoff,
+      minIdrSize = args$minIdrSize,
+      showProgress = TRUE
+    )
+  }
+
+  if (!is.null(args$pathToIUPred2AresultFile)) {
+    SwitchList <- analyzeIUPred2A(
+      SwitchList,
+      pathToIUPred2AresultFile = args$pathToIUPred2AresultFile,
+      smoothingWindowSize = args$smoothingWindowSize,
+      probabilityCutoff = args$probabilityCutoff,
+      minIdrSize = args$minIdrSize,
+      annotateBindingSites = args$annotateBindingSites,
+      minIdrBindingSize = args$minIdrBindingSize,
+      minIdrBindingOverlapFrac = args$minIdrBindingOverlapFrac,
+      showProgress = TRUE,
+      quiet = FALSE
+    )
+  }
+
+  if (!is.null(args$pathToSignalPresultFile)) {
+    signalpFiles <- list.files(path = args$pathToSignalPresultFile,
+                               full.names = TRUE)
+
+    SwitchList <- analyzeSignalP(
+      SwitchList,
+      pathToSignalPresultFile = signalpFiles,
+      minSignalPeptideProbability = args$minSignalPeptideProbability
+    )
+  }
+
+  SwitchList <- analyzeAlternativeSplicing(
+    SwitchList,
+    onlySwitchingGenes = args$onlySwitchingGenes,
+    alpha = args$alpha,
+    dIFcutoff = args$dIFcutoff,
+    showProgress = TRUE
+  )
+
+  SwitchList <- analyzeIntronRetention(
+    SwitchList,
+    onlySwitchingGenes = args$onlySwitchingGenes,
+    alpha = args$alpha,
+    dIFcutoff = args$dIFcutoff,
+    showProgress = TRUE
+  )
+
+  consequences <- c(
+    "intron_retention",
+    "NMD_status",
+    "isoform_seq_similarity",
+    "ORF_genomic",
+    "tss",
+    "tts"
+  )
+
+  if (!is.null(args$pathToCPATresultFile) ||
+      !is.null(args$pathToCPC2resultFile)) {
+    updatedConsequences <- c(consequences, "coding_potential")
+    consequences <- updatedConsequences
+  }
+
+  if (!is.null(args$pathToPFAMresultFile)) {
+    updatedConsequences <- c(consequences, "domains_identified")
+    consequences <- updatedConsequences
+  }
+
+  if (!is.null(args$pathToSignalPresultFile)) {
+    updatedConsequences <- c(consequences, "signal_peptide_identified")
+    consequences <- updatedConsequences
+  }
+
+  if (!is.null(args$pathToNetSurfP2resultFile) ||
+      !is.null(args$pathToIUPred2AresultFile)) {
+    updatedConsequences <- c(consequences, "IDR_identified", "IDR_type")
+    consequences <- updatedConsequences
+  }
+
+  SwitchList <- analyzeSwitchConsequences(
+    SwitchList,
+    consequencesToAnalyze = consequences,
+    alpha = args$alpha,
+    dIFcutoff = args$dIFcutoff,
+    onlySigIsoforms = args$onlySigIsoforms,
+    ntCutoff = args$ntCutoff,
+    ntJCsimCutoff = args$ntJCsimCutoff,
+    AaCutoff = args$AaCutoff,
+    AaFracCutoff = args$AaFracCutoff,
+    AaJCsimCutoff = args$AaJCsimCutoff,
+    removeNonConseqSwitches = args$removeNonConseqSwitches,
+    showProgress = TRUE
+  )
+
+
+  ### Visual analysis
+  # Top genes
+
+  if (args$analysisMode == "single") {
+
+    outputFile <- file.path(getwd(), "single_gene.pdf")
+
+    pdf(
+      file = outputFile,
+      onefile = FALSE,
+      height = 6,
+      width = 9
+    )
+
+    switchPlot(
+      SwitchList,
+      gene = args$gene,
+      condition1 = myDesign$condition[1],
+      condition2 = myDesign$condition[length(myDesign$condition)],
+      IFcutoff = args$IFcutoff,
+      dIFcutoff = args$dIFcutoff,
+      rescaleTranscripts = args$rescaleTranscripts,
+      reverseMinus = args$reverseMinus,
+      addErrorbars = args$addErrorbars,
+      logYaxis = FALSE,
+      localTheme = theme_bw(base_size = 8)
+    )
+    dev.off()
+
+  } else {
+    mostSwitchingGene <-
+      extractTopSwitches(
+        SwitchList,
+        n = Inf,
+        filterForConsequences = args$filterForConsequences,
+        extractGenes = TRUE,
+        alpha = args$alpha,
+        dIFcutoff = args$dIFcutoff,
+        inEachComparison = FALSE,
+        sortByQvals = args$sortByQvals
+      )
+
+    write.table(
+      mostSwitchingGene,
+      file = "mostSwitchingGene.tsv",
+      quote = FALSE,
+      sep = "\t",
+      col.names = TRUE,
+      row.names = FALSE
+    )
+
+
+    switchPlotTopSwitches(
+      SwitchList,
+      alpha = args$alpha,
+      dIFcutoff = args$dIFcutoff,
+      onlySigIsoforms = args$onlySigIsoforms,
+      n = args$genesToPlot,
+      sortByQvals = args$sortByQvals,
+      pathToOutput = getwd(),
+      fileType = "pdf"
+    )
+
+    outputFile <-
+      file.path(getwd(), "extractConsequencesSummary.pdf")
+
+    pdf(
+      file = outputFile,
+      onefile = FALSE,
+      height = 6,
+      width = 9
+    )
+
+    consequenceSummary <- extractConsequenceSummary(
+      SwitchList,
+      consequencesToAnalyze = "all",
+      includeCombined = FALSE,
+      asFractionTotal = args$asFractionTotal,
+      alpha = args$alpha,
+      dIFcutoff = args$dIFcutoff,
+      plot = TRUE,
+      plotGenes = args$plotGenes,
+      simplifyLocation = args$simplifyLocation,
+      removeEmptyConsequences = args$removeEmptyConsequences,
+      returnResult = TRUE,
+      localTheme = theme_bw()
+    )
+    dev.off()
+
+    write.table(
+      consequenceSummary,
+      file = "consequencesSummary.tsv",
+      quote = FALSE,
+      sep = "\t",
+      col.names = TRUE,
+      row.names = FALSE
+    )
+
+
+    outputFile <- file.path(getwd(), "consequencesEnrichment.pdf")
+    pdf(
+      file = outputFile,
+      onefile = FALSE,
+      height = 6,
+      width = 9
+    )
+    consequenceEnrichment <- extractConsequenceEnrichment(
+      SwitchList,
+      consequencesToAnalyze = "all",
+      alpha = args$alpha,
+      dIFcutoff = args$dIFcutoff,
+      countGenes = args$countGenes,
+      analysisOppositeConsequence = args$analysisOppositeConsequence,
+      plot = TRUE,
+      localTheme = theme_bw(base_size = 12),
+      minEventsForPlotting = 10,
+      returnResult = TRUE
+    )
+    dev.off()
+
+    write.table(
+      consequenceEnrichment,
+      file = "consequencesEnrichment.tsv",
+      quote = FALSE,
+      sep = "\t",
+      col.names = TRUE,
+      row.names = FALSE
+    )
+
+
+    outputFile <- file.path(getwd(), "splicingEnrichment.pdf")
+    pdf(
+      file = outputFile,
+      onefile = FALSE,
+      height = 6,
+      width = 9
+    )
+    splicingEnrichment <- extractSplicingEnrichment(
+      SwitchList,
+      splicingToAnalyze = "all",
+      alpha = args$alpha,
+      dIFcutoff = args$dIFcutoff,
+      onlySigIsoforms = args$onlySigIsoforms,
+      countGenes = args$countGenes,
+      plot = TRUE,
+      minEventsForPlotting = 10,
+      returnResult = TRUE
+    )
+    dev.off()
+
+    write.table(
+      splicingEnrichment,
+      file = "splicingEnrichment.tsv",
+      quote = FALSE,
+      sep = "\t",
+      col.names = TRUE,
+      row.names = FALSE
+    )
+
+
+    outputFile <- file.path(getwd(), "splicingSummary.pdf")
+    pdf(
+      file = outputFile,
+      onefile = FALSE,
+      height = 6,
+      width = 9
+    )
+    splicingSummary <- extractSplicingSummary(
+      SwitchList,
+      splicingToAnalyze = "all",
+      asFractionTotal = args$asFractionTotal,
+      alpha = args$alpha,
+      dIFcutoff = args$dIFcutoff,
+      onlySigIsoforms = args$onlySigIsoforms,
+      plot = TRUE,
+      plotGenes = args$plotGenes,
+      localTheme = theme_bw(),
+      returnResult = TRUE
+    )
+    dev.off()
+
+    write.table(
+      splicingSummary,
+      file = "splicingSummary.tsv",
+      quote = FALSE,
+      sep = "\t",
+      col.names = TRUE,
+      row.names = FALSE
+    )
+
+
+    ### Volcano like plot:
+    outputFile <- file.path(getwd(), "volcanoPlot.pdf")
+    pdf(
+      file = outputFile,
+      onefile = FALSE,
+      height = 6,
+      width = 9
+    )
+    ggplot(data = SwitchList$isoformFeatures, aes(x = dIF, y = -log10(isoform_switch_q_value))) +
+      geom_point(aes(color = abs(dIF) > 0.1 &
+                       isoform_switch_q_value < 0.05), # default cutoff
+                 size = 1) +
+      geom_hline(yintercept = -log10(0.05), linetype = "dashed") + # default cutoff
+      geom_vline(xintercept = c(-0.1, 0.1), linetype = "dashed") + # default cutoff
+      facet_wrap(~ condition_2) +
+      scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) +
+      labs(x = "dIF", y = "-Log10 ( Isoform Switch Q Value )") +
+      theme_bw()
+    dev.off()
+
+
+    ### Switch vs Gene changes:
+    outputFile <- file.path(getwd(), "switchGene.pdf")
+    pdf(
+      file = outputFile,
+      onefile = FALSE,
+      height = 6,
+      width = 9
+    )
+    ggplot(data = SwitchList$isoformFeatures,
+           aes(x = gene_log2_fold_change, y = dIF)) +
+      geom_point(aes(color = abs(dIF) > 0.1 &
+                       isoform_switch_q_value < 0.05),
+                 size = 1) +
+      facet_wrap(~ condition_2) +
+      geom_hline(yintercept = 0, linetype = "dashed") +
+      geom_vline(xintercept = 0, linetype = "dashed") +
+      scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) +
+      labs(x = "Gene log2 fold change", y = "dIF") +
+      theme_bw()
+    dev.off()
+
+    outputFile <- file.path(getwd(), "splicingGenomewide.pdf")
+    pdf(
+      file = outputFile,
+      onefile = FALSE,
+      height = 6,
+      width = 9
+    )
+    splicingGenomeWide <- extractSplicingGenomeWide(
+      SwitchList,
+      featureToExtract = "all",
+      splicingToAnalyze = c("A3", "MES", "ATSS"),
+      plot = TRUE,
+      returnResult = TRUE
+    )
+    dev.off()
+  }
+  save(SwitchList, file = "SwitchList.Rda")
+
+}
author	iuc
date	Tue, 24 Jan 2023 18:37:14 +0000
parents
children	2c4e879a81cf