Mercurial > repos > iuc > isoformswitchanalyzer
changeset 7:d3377a16d881 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/isoformswitchanalyzer commit db7d80651213272678725ba877d95f5113378878
| author | iuc |
|---|---|
| date | Wed, 14 Jan 2026 09:30:42 +0000 |
| parents | 298d81e5e138 |
| children | |
| files | IsoformSwitchAnalyzeR.R isoformswitchanalyzer.xml macros.xml test-data/ASP14_1.tabular test-data/ASP14_2.tabular test-data/ASP14_3.tabular test-data/ASP14_doxycycline_1.tabular test-data/ASP14_doxycycline_2.tabular test-data/ASP14_doxycycline_3.tabular test-data/annotation_stringtie.gtf.gz test-data/gencode.hg19.chr10_1000.gtf.gz test-data/stringtie_counts.tabular test-data/test02_samples_annotation.tabular test-data/test10_samples_annotation.tabular test-data/transcriptome_stringtie.fasta.gz |
| diffstat | 15 files changed, 7167 insertions(+), 823 deletions(-) [+] |
line wrap: on
line diff
--- a/IsoformSwitchAnalyzeR.R Wed Dec 20 18:03:04 2023 +0000 +++ b/IsoformSwitchAnalyzeR.R Wed Jan 14 09:30:42 2026 +0000 @@ -1,7 +1,8 @@ # Load the IsoformSwitchAnalyzeR library library(IsoformSwitchAnalyzeR, - quietly = TRUE, - warn.conflicts = FALSE) + quietly = TRUE, + warn.conflicts = FALSE +) library(argparse, quietly = TRUE, warn.conflicts = FALSE) library(dplyr, quietly = TRUE, warn.conflicts = FALSE) library(ggplot2, quietly = TRUE, warn.conflicts = FALSE) @@ -9,11 +10,11 @@ # setup R error handling to go to stderr options( - show.error.messages = FALSE, - error = function() { - cat(geterrmessage(), file = stderr()) - q("no", 1, FALSE) - } + show.error.messages = FALSE, + error = function() { + cat(geterrmessage(), file = stderr()) + q("no", 1, FALSE) + } ) # we need that to not crash galaxy with an UTF8 error on German LC settings. @@ -28,319 +29,351 @@ parser <- ArgumentParser(description = "IsoformSwitcheR R script") parser$add_argument("--modeSelector") -parser$add_argument("--parentDir", required = FALSE, help = "Parent directory") +parser$add_argument("--parentDir", required = FALSE, help = "Parent directory") parser$add_argument("--condition", - action = "append", - required = FALSE, - help = "Conditions") + action = "append", + required = FALSE, + help = "Conditions" +) parser$add_argument("--sampleID", - action = "append", - required = FALSE, - help = "SampleID") + action = "append", + required = FALSE, + help = "SampleID" +) parser$add_argument("--replicate", - action = "append", - required = FALSE, - help = "Replicates") + action = "append", + required = FALSE, + help = "Replicates" +) +parser$add_argument("--cofactors", + action = "append", + required = FALSE, + help = "Cofactors (comma-separated values)" +) +parser$add_argument("--cofactorNames", + required = FALSE, + help = "Cofactor names (comma-separated)" +) parser$add_argument("--readLength", - required = FALSE, - type = "integer", - help = "Read length (required for stringtie)") + required = FALSE, + type = "integer", + help = "Read length (required for stringtie)" +) parser$add_argument("--pairedSamples", action = "store_true", required = FALSE, help = "Paired samples") parser$add_argument("--annotation", required = FALSE, help = "Annotation") parser$add_argument("--stringtieAnnotation", required = FALSE, help = "Stringtie annotation") parser$add_argument("--transcriptome", required = FALSE, help = "Transcriptome") parser$add_argument( - "--fixStringTieAnnotationProblem", - action = "store_true", - required = FALSE, - help = "Fix StringTie annotation problem" + "--fixStringTieAnnotationProblem", + action = "store_true", + required = FALSE, + help = "Fix StringTie annotation problem" ) parser$add_argument("--countFiles", required = FALSE, help = "Count files") parser$add_argument("--toolSource", required = FALSE, help = "Tool source") parser$add_argument("--rObject", required = FALSE, help = "R object") parser$add_argument("--IFcutoff", - required = FALSE, - type = "numeric", - help = "IFcutoff") -parser$add_argument( - "--geneExpressionCutoff", - required = FALSE, - type = "numeric", - help = "Gene expression cutoff" + required = FALSE, + type = "numeric", + help = "IFcutoff" ) parser$add_argument( - "--isoformExpressionCutoff", - required = FALSE, - type = "numeric", - help = "Isoform expression cutoff" + "--geneExpressionCutoff", + required = FALSE, + type = "numeric", + help = "Gene expression cutoff" +) +parser$add_argument( + "--isoformExpressionCutoff", + required = FALSE, + type = "numeric", + help = "Isoform expression cutoff" ) parser$add_argument("--alpha", - required = FALSE, - type = "numeric", - help = "") + required = FALSE, + type = "numeric", + help = "" +) parser$add_argument("--dIFcutoff", - required = FALSE, - type = "numeric", - help = "dIF cutoff") + required = FALSE, + type = "numeric", + help = "dIF cutoff" +) parser$add_argument( - "--onlySigIsoforms", - required = FALSE, - action = "store_true", - help = "Only significative isoforms" + "--onlySigIsoforms", + required = FALSE, + action = "store_true", + help = "Only significative isoforms" ) parser$add_argument( - "--filterForConsequences", - required = FALSE, - action = "store_true", - help = "Filter for consequences" + "--filterForConsequences", + required = FALSE, + action = "store_true", + help = "Filter for consequences" ) parser$add_argument( - "--removeSingleIsformGenes", - required = FALSE, - action = "store_true", - help = "Remove single isoform genes" + "--removeSingleIsformGenes", + required = FALSE, + action = "store_true", + help = "Remove single isoform genes" ) parser$add_argument( - "--keepIsoformInAllConditions", - required = FALSE, - action = "store_true", - help = "Keep isoform in all conditions" + "--keepIsoformInAllConditions", + required = FALSE, + action = "store_true", + help = "Keep isoform in all conditions" ) parser$add_argument( - "--correctForConfoundingFactors", - required = FALSE, - action = "store_true", - help = "Correct for confunding factors" + "--correctForConfoundingFactors", + required = FALSE, + action = "store_true", + help = "Correct for confunding factors" ) parser$add_argument( - "--overwriteIFvalues", - required = FALSE, - action = "store_true", - help = "Overwrite IF values" + "--overwriteIFvalues", + required = FALSE, + action = "store_true", + help = "Overwrite IF values" ) parser$add_argument( - "--removeNonConvensionalChr", - required = FALSE, - action = "store_true", - help = "Remove non-conventional chromosomes" + "--removeNonConvensionalChr", + required = FALSE, + action = "store_true", + help = "Remove non-conventional chromosomes" ) parser$add_argument( - "--reduceToSwitchingGenes", - required = FALSE, - action = "store_true", - help = "Reduce to switching genes" + "--reduceToSwitchingGenes", + required = FALSE, + action = "store_true", + help = "Reduce to switching genes" ) parser$add_argument( - "--reduceFurtherToGenesWithConsequencePotential", - required = FALSE, - action = "store_true", - help = "Reduce further to genes with consequence potential" + "--reduceFurtherToGenesWithConsequencePotential", + required = FALSE, + action = "store_true", + help = "Reduce further to genes with consequence potential" ) parser$add_argument( - "--keepIsoformInAllConditions2", - required = FALSE, - action = "store_true", - help = "Keep isoform in ll conditions" + "--keepIsoformInAllConditions2", + required = FALSE, + action = "store_true", + help = "Keep isoform in ll conditions" ) parser$add_argument("--minORFlength", - required = FALSE, - type = "integer", - help = "") + required = FALSE, + type = "integer", + help = "" +) parser$add_argument("--orfMethod", required = FALSE, help = "ORF methods") parser$add_argument("--PTCDistance", - required = FALSE, - type = "integer", - help = "") + required = FALSE, + type = "integer", + help = "" +) parser$add_argument( - "--removeShortAAseq", - required = FALSE, - action = "store_true", - help = "Remove short aminoacid sequences" + "--removeShortAAseq", + required = FALSE, + action = "store_true", + help = "Remove short aminoacid sequences" ) parser$add_argument( - "--removeLongAAseq", - required = FALSE, - action = "store_true", - help = "Remove long aminoacid sequences" + "--removeLongAAseq", + required = FALSE, + action = "store_true", + help = "Remove long aminoacid sequences" ) parser$add_argument( - "--removeORFwithStop", - required = FALSE, - action = "store_true", - help = "Remove ORF with stop codon" + "--removeORFwithStop", + required = FALSE, + action = "store_true", + help = "Remove ORF with stop codon" ) parser$add_argument( - "--onlySwitchingGenes", - required = FALSE, - action = "store_true", - help = "Only switching genes" + "--onlySwitchingGenes", + required = FALSE, + action = "store_true", + help = "Only switching genes" ) parser$add_argument("--analysisMode", required = FALSE, help = "Analyze all isoforms with differential usage or single genes") parser$add_argument( - "--genesToPlot", - type = "integer", - default = 10, - required = FALSE, - help = "Number of genes to plot" + "--genesToPlot", + type = "integer", + default = 10, + required = FALSE, + help = "Number of genes to plot" ) parser$add_argument("--gene", required = FALSE, help = "Gene ID to analyze") parser$add_argument( - "--sortByQvals", - action = "store_true", - required = FALSE, - help = "Sort genes by Q-val values" + "--sortByQvals", + action = "store_true", + required = FALSE, + help = "Sort genes by Q-val values" ) parser$add_argument("--countGenes", - action = "store_true", - required = FALSE, - help = "Count genes") + action = "store_true", + required = FALSE, + help = "Count genes" +) parser$add_argument( - "--asFractionTotal", - action = "store_true", - required = FALSE, - help = "Plot gene expresson as fraction of total" + "--asFractionTotal", + action = "store_true", + required = FALSE, + help = "Plot gene expresson as fraction of total" ) parser$add_argument("--plotGenes", - action = "store_true", - required = FALSE, - help = "Plot genes instead of isoforms") + action = "store_true", + required = FALSE, + help = "Plot genes instead of isoforms" +) parser$add_argument( - "--simplifyLocation", - action = "store_true", - required = FALSE, - help = "Simplify localtion" + "--simplifyLocation", + action = "store_true", + required = FALSE, + help = "Simplify localtion" ) parser$add_argument( - "--removeEmptyConsequences", - action = "store_true", - required = FALSE, - help = "Remove empty consequences" + "--removeEmptyConsequences", + action = "store_true", + required = FALSE, + help = "Remove empty consequences" ) parser$add_argument( - "--analysisOppositeConsequence", - action = "store_true", - required = FALSE, - help = "Analysi opposite consequences" + "--analysisOppositeConsequence", + action = "store_true", + required = FALSE, + help = "Analysi opposite consequences" ) parser$add_argument("--pathToCPATresultFile", - required = FALSE, - help = "Path to CPAT result file") + required = FALSE, + help = "Path to CPAT result file" +) parser$add_argument("--pathToCPC2resultFile", - required = FALSE, - help = "Path to CPC2 result file") + required = FALSE, + help = "Path to CPC2 result file" +) parser$add_argument("--pathToPFAMresultFile", - required = FALSE, - help = "Path to PFAM result file") + required = FALSE, + help = "Path to PFAM result file" +) parser$add_argument("--pathToNetSurfP2resultFile", - required = FALSE, - help = "Path to NetSurfP2 result file") + required = FALSE, + help = "Path to NetSurfP2 result file" +) parser$add_argument("--pathToSignalPresultFile", - required = FALSE, - help = "Path to signalP result file") + required = FALSE, + help = "Path to signalP result file" +) parser$add_argument("--pathToIUPred2AresultFile", - required = FALSE, - help = "Path to IUPred2A result file") + required = FALSE, + help = "Path to IUPred2A result file" +) parser$add_argument("--codingCutoff", - required = FALSE, - type = "numeric", - help = "Codding cutoff") -parser$add_argument( - "--removeNoncodingORFs", - action = "store_true", - required = FALSE, - help = "Remove non-coding ORFs" + required = FALSE, + type = "numeric", + help = "Codding cutoff" ) parser$add_argument( - "--minSignalPeptideProbability", - required = FALSE, - type = "numeric", - help = "Minimul signal peptide probability" + "--removeNoncodingORFs", + action = "store_true", + required = FALSE, + help = "Remove non-coding ORFs" ) parser$add_argument( - "--smoothingWindowSize", - type = "integer", - required = FALSE, - help = "Smoothing windows size" + "--minSignalPeptideProbability", + required = FALSE, + type = "numeric", + help = "Minimul signal peptide probability" ) parser$add_argument( - "--probabilityCutoff", - required = FALSE, - type = "double", - help = "Probability cutoff" + "--smoothingWindowSize", + type = "integer", + required = FALSE, + help = "Smoothing windows size" +) +parser$add_argument( + "--probabilityCutoff", + required = FALSE, + type = "double", + help = "Probability cutoff" ) parser$add_argument("--minIdrSize", - required = FALSE, - type = "integer", - help = "Min Idr size") + required = FALSE, + type = "integer", + help = "Min Idr size" +) parser$add_argument( - "--annotateBindingSites", - action = "store_true", - required = FALSE, - help = "Annotate binding sites" + "--annotateBindingSites", + action = "store_true", + required = FALSE, + help = "Annotate binding sites" ) parser$add_argument( - "--minIdrBindingSize", - required = FALSE, - type = "integer", - help = "Minimun Idr binding size" + "--minIdrBindingSize", + required = FALSE, + type = "integer", + help = "Minimun Idr binding size" ) parser$add_argument( - "--minIdrBindingOverlapFrac", - required = FALSE, - type = "numeric", - help = "" + "--minIdrBindingOverlapFrac", + required = FALSE, + type = "numeric", + help = "" ) parser$add_argument("--ntCutoff", - required = FALSE, - type = "integer", - help = "Nucleotide cutoff") + required = FALSE, + type = "integer", + help = "Nucleotide cutoff" +) parser$add_argument("--ntFracCutoff", - required = FALSE, - type = "numeric", - help = "Nucleotide fraction cutoff") + required = FALSE, + type = "numeric", + help = "Nucleotide fraction cutoff" +) parser$add_argument( - "--ntJCsimCutoff", - required = FALSE, - type = "numeric", - help = "Nucleotide Jaccard simmilarity cutoff" + "--ntJCsimCutoff", + required = FALSE, + type = "numeric", + help = "Nucleotide Jaccard simmilarity cutoff" ) parser$add_argument("--AaCutoff", - required = FALSE, - type = "integer", - help = "Aminoacid cutoff") + required = FALSE, + type = "integer", + help = "Aminoacid cutoff" +) parser$add_argument("--AaFracCutoff", - required = FALSE, - type = "numeric", - help = "Aminoacid fraction cutoff") + required = FALSE, + type = "numeric", + help = "Aminoacid fraction cutoff" +) parser$add_argument( - "--AaJCsimCutoff", - required = FALSE, - type = "numeric", - help = "Aminoacid Jaccard similarity cutoff" + "--AaJCsimCutoff", + required = FALSE, + type = "numeric", + help = "Aminoacid Jaccard similarity cutoff" ) parser$add_argument( - "--removeNonConseqSwitches", - action = "store_true", - required = FALSE, - help = "Remove switches without consequences" + "--removeNonConseqSwitches", + action = "store_true", + required = FALSE, + help = "Remove switches without consequences" ) parser$add_argument( - "--rescaleTranscripts", - action = "store_true", - required = FALSE, - help = "Rescale transcripts" + "--rescaleTranscripts", + action = "store_true", + required = FALSE, + help = "Rescale transcripts" ) parser$add_argument( - "--reverseMinus", - action = "store_true", - required = FALSE, - help = "Reverse minus" + "--reverseMinus", + action = "store_true", + required = FALSE, + help = "Reverse minus" ) parser$add_argument( - "--addErrorbars", - action = "store_true", - required = FALSE, - help = "Add error bars" + "--addErrorbars", + action = "store_true", + required = FALSE, + help = "Add error bars" ) @@ -350,661 +383,706 @@ ################### if (args$modeSelector == "data_import") { - - quantificationData <- importIsoformExpression( - parentDir = args$parentDir, - addIsofomIdAsColumn = TRUE, - readLength = args$readLength - ) + quantificationData <- importIsoformExpression( + parentDir = args$parentDir, + addIsofomIdAsColumn = TRUE, + readLength = args$readLength + ) - if (!args$pairedSamples) { - ### Make design matrix - myDesign <- data.frame( - sampleID = args$sampleID, - condition = args$condition) - } else { - myDesign <- data.frame( - sampleID = args$sampleID, - condition = args$condition, - replicate = args$replicate) - } + if (!args$pairedSamples) { + ### Make design matrix + if (!is.null(args$cofactors)) { + # Split comma-separated cofactor values for each sample + cofactor_list <- lapply(args$cofactors, function(x) { + strsplit(x, ",")[[1]] + }) - comparisons <- as.data.frame(cbind( - condition_1 = myDesign$condition[1], - condition_2 = myDesign$condition[length(myDesign$condition)] - )) + cofactor_names <- strsplit(as.character(args$cofactorNames), ",")[[1]] + + # Create design matrix with cofactors as additional columns + myDesign <- data.frame( + sampleID = args$sampleID, + condition = args$condition + ) - if (args$toolSource == "stringtie") { - if (!is.null(args$stringtieAnnotation)) { - SwitchList <- importRdata( - isoformCountMatrix = quantificationData$counts, - isoformRepExpression = quantificationData$abundance, - designMatrix = myDesign, - removeNonConvensionalChr = args$removeNonConvensionalChr, - isoformExonAnnoation = args$stringtieAnnotation, - isoformNtFasta = args$transcriptome, - addAnnotatedORFs = FALSE, - showProgress = TRUE, - comparisonsToMake = comparisons, - fixStringTieAnnotationProblem = args$fixStringTieAnnotationProblem - ) + # Add cofactor columns with actual names + for (i in seq_along(cofactor_names)) { + cofactor_col <- sapply(cofactor_list, function(x) x[i]) + myDesign[[cofactor_names[i]]] <- cofactor_col + } + } else { + myDesign <- data.frame( + sampleID = args$sampleID, + condition = args$condition + ) + } + } else { + if (!is.null(args$cofactors)) { + # Split comma-separated cofactor values for each sample + cofactor_list <- lapply(args$cofactors, function(x) { + strsplit(x, ",")[[1]] + }) + + cofactor_names <- strsplit(as.character(args$cofactorNames), ",")[[1]] - SwitchList <- addORFfromGTF( - SwitchList, - removeNonConvensionalChr = args$removeNonConvensionalChr, - pathToGTF = args$annotation - ) + # Create design matrix with cofactors as additional columns + myDesign <- data.frame( + sampleID = args$sampleID, + condition = args$condition, + replicate = args$replicate + ) - } else { - SwitchList <- importRdata( - isoformCountMatrix = quantificationData$counts, - isoformRepExpression = quantificationData$abundance, - designMatrix = myDesign, - removeNonConvensionalChr = args$removeNonConvensionalChr, - isoformNtFasta = args$transcriptome, - isoformExonAnnoation = args$annotation, - showProgress = TRUE, - comparisonsToMake = comparisons, - fixStringTieAnnotationProblem = args$fixStringTieAnnotationProblem - ) + # Add cofactor columns with actual names + for (i in seq_along(cofactor_names)) { + cofactor_col <- sapply(cofactor_list, function(x) x[i]) + myDesign[[cofactor_names[i]]] <- cofactor_col + } + } else { + myDesign <- data.frame( + sampleID = args$sampleID, + condition = args$condition, + replicate = args$replicate + ) + } } - } else { - SwitchList <- importRdata( - isoformCountMatrix = quantificationData$counts, - isoformRepExpression = quantificationData$abundance, - designMatrix = myDesign, - removeNonConvensionalChr = args$removeNonConvensionalChr, - isoformExonAnnoation = args$annotation, - isoformNtFasta = args$transcriptome, - showProgress = TRUE, - comparisonsToMake = comparisons - ) - } + comparisons <- as.data.frame(cbind( + condition_1 = myDesign$condition[1], + condition_2 = myDesign$condition[length(myDesign$condition)] + )) + + if (args$toolSource == "stringtie") { + if (!is.null(args$stringtieAnnotation)) { + SwitchList <- importRdata( + isoformCountMatrix = quantificationData$counts, + isoformRepExpression = quantificationData$abundance, + designMatrix = myDesign, + removeNonConvensionalChr = args$removeNonConvensionalChr, + isoformExonAnnoation = args$stringtieAnnotation, + isoformNtFasta = args$transcriptome, + addAnnotatedORFs = FALSE, + showProgress = TRUE, + comparisonsToMake = comparisons, + fixStringTieAnnotationProblem = args$fixStringTieAnnotationProblem + ) - geneCountMatrix <- extractGeneExpression( - SwitchList, - extractCounts = TRUE, - addGeneNames = FALSE, - addIdsAsColumns = FALSE - ) - - if (args$countFiles == "collection") { - - expressionDF <- data.frame(geneCountMatrix) + SwitchList <- addORFfromGTF( + SwitchList, + removeNonConvensionalChr = args$removeNonConvensionalChr, + pathToGTF = args$annotation + ) + } else { + SwitchList <- importRdata( + isoformCountMatrix = quantificationData$counts, + isoformRepExpression = quantificationData$abundance, + designMatrix = myDesign, + removeNonConvensionalChr = args$removeNonConvensionalChr, + isoformNtFasta = args$transcriptome, + isoformExonAnnoation = args$annotation, + showProgress = TRUE, + comparisonsToMake = comparisons, + fixStringTieAnnotationProblem = args$fixStringTieAnnotationProblem + ) + } + } else { + SwitchList <- importRdata( + isoformCountMatrix = quantificationData$counts, + isoformRepExpression = quantificationData$abundance, + designMatrix = myDesign, + removeNonConvensionalChr = args$removeNonConvensionalChr, + isoformExonAnnoation = args$annotation, + isoformNtFasta = args$transcriptome, + showProgress = TRUE, + comparisonsToMake = comparisons + ) + } - myDesign$condition[length(myDesign$condition)] - - dataframe_factor1 <- expressionDF %>% select(matches(myDesign$condition[1])) - dataframe_factor2 <- expressionDF %>% select(matches(myDesign$condition[length(myDesign$condition)])) - + geneCountMatrix <- extractGeneExpression( + SwitchList, + extractCounts = TRUE, + addGeneNames = FALSE, + addIdsAsColumns = FALSE + ) - lf1 <- as.list(as.data.frame(dataframe_factor1)) - sampleNames1 <- colnames(as.data.frame(dataframe_factor1)) + if (args$countFiles == "collection") { + expressionDF <- data.frame(geneCountMatrix) - lf2 <- as.list(as.data.frame(dataframe_factor2)) - sampleNames2 <- colnames(as.data.frame(dataframe_factor2)) + myDesign$condition[length(myDesign$condition)] - geneNames <- row.names(as.data.frame(expressionDF)) + dataframe_factor1 <- expressionDF %>% select(matches(myDesign$condition[1])) + dataframe_factor2 <- expressionDF %>% select(matches(myDesign$condition[length(myDesign$condition)])) - for (index in seq_along(lf1)) { - tabular_expression <- data.frame(geneNames, lf1[index]) - colnames(tabular_expression) <- - c("Geneid", sampleNames1[index]) - filename <- - paste(sampleNames1[index], "dataset.tabular", sep = "_") - output_path <- paste("./count_files/factor1/", filename, sep = "") - write.table( - tabular_expression, - output_path, - sep = "\t", - row.names = FALSE, - quote = FALSE - ) + lf1 <- as.list(as.data.frame(dataframe_factor1)) + sampleNames1 <- colnames(as.data.frame(dataframe_factor1)) + + lf2 <- as.list(as.data.frame(dataframe_factor2)) + sampleNames2 <- colnames(as.data.frame(dataframe_factor2)) + + geneNames <- row.names(as.data.frame(expressionDF)) + + + for (index in seq_along(lf1)) { + tabular_expression <- data.frame(geneNames, lf1[index]) + colnames(tabular_expression) <- + c("Geneid", sampleNames1[index]) + filename <- + paste(sampleNames1[index], "dataset.tabular", sep = "_") + output_path <- paste("./count_files/factor1/", filename, sep = "") + write.table( + tabular_expression, + output_path, + sep = "\t", + row.names = FALSE, + quote = FALSE + ) + } + for (index in seq_along(lf2)) { + tabular_expression <- data.frame(geneNames, lf2[index]) + colnames(tabular_expression) <- + c("Geneid", sampleNames2[index]) + filename <- + paste(sampleNames2[index], "dataset.tabular", sep = "_") + output_path <- paste("./count_files/factor2/", filename, sep = "") + write.table( + tabular_expression, + output_path, + sep = "\t", + row.names = FALSE, + quote = FALSE + ) + } + } else if (args$countFiles == "matrix") { + expressionDF <- data.frame(geneCountMatrix) + geneNames <- row.names(expressionDF) + + expressionDF <- cbind(geneNames, expressionDF) + write.table( + as.data.frame(expressionDF), + "./count_files/matrix.tabular", + sep = "\t", + row.names = FALSE, + quote = FALSE + ) + write.table( + as.data.frame(myDesign), + "./count_files/samples.tabular", + sep = "\t", + row.names = FALSE, + quote = FALSE + ) } - for (index in seq_along(lf2)) { - tabular_expression <- data.frame(geneNames, lf2[index]) - colnames(tabular_expression) <- - c("Geneid", sampleNames2[index]) - filename <- - paste(sampleNames2[index], "dataset.tabular", sep = "_") - output_path <- paste("./count_files/factor2/", filename, sep = "") - write.table( - tabular_expression, - output_path, - sep = "\t", - row.names = FALSE, - quote = FALSE - ) - } - } else if (args$countFiles == "matrix") { - expressionDF <- data.frame(geneCountMatrix) - geneNames <- row.names(expressionDF) - expressionDF <- cbind(geneNames, expressionDF) - write.table( - as.data.frame(expressionDF), - "./count_files/matrix.tabular", - sep = "\t", - row.names = FALSE, - quote = FALSE - ) - write.table( - as.data.frame(myDesign), - "./count_files/samples.tabular", - sep = "\t", - row.names = FALSE, - quote = FALSE - ) - } - - save(SwitchList, file = "SwitchList.Rda") - + save(SwitchList, file = "SwitchList.Rda") } if (args$modeSelector == "first_step") { + # First part of the analysis + ############################# - # First part of the analysis - ############################# - - load(file = args$rObject) + load(file = args$rObject) - ### Filter - SwitchList <- preFilter( - SwitchList, - IFcutoff = args$IFcutoff, - geneExpressionCutoff = args$geneExpressionCutoff, - isoformExpressionCutoff = args$isoformExpressionCutoff, - removeSingleIsoformGenes = args$removeSingleIsformGenes, - onlySigIsoforms = args$onlySigIsoforms, - keepIsoformInAllConditions = args$keepIsoformInAllConditions, - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - ) + ### Filter + SwitchList <- preFilter( + SwitchList, + IFcutoff = args$IFcutoff, + geneExpressionCutoff = args$geneExpressionCutoff, + isoformExpressionCutoff = args$isoformExpressionCutoff, + removeSingleIsoformGenes = args$removeSingleIsformGenes, + onlySigIsoforms = args$onlySigIsoforms, + keepIsoformInAllConditions = args$keepIsoformInAllConditions, + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + ) - ### Test for isoform switches - SwitchList <- isoformSwitchTestDEXSeq( - SwitchList, - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - correctForConfoundingFactors = args$correctForConfoundingFactors, - overwriteIFvalues = args$overwriteIFvalues, - reduceToSwitchingGenes = args$reduceToSwitchingGenes, - reduceFurtherToGenesWithConsequencePotential = args$reduceFurtherToGenesWithConsequencePotential, - onlySigIsoforms = args$onlySigIsoforms, - keepIsoformInAllConditions = args$keepIsoformInAllConditions2, - showProgress = TRUE, - ) + ### Test for isoform switches + SwitchList <- isoformSwitchTestDEXSeq( + SwitchList, + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + correctForConfoundingFactors = args$correctForConfoundingFactors, + overwriteIFvalues = args$overwriteIFvalues, + reduceToSwitchingGenes = args$reduceToSwitchingGenes, + reduceFurtherToGenesWithConsequencePotential = args$reduceFurtherToGenesWithConsequencePotential, + onlySigIsoforms = args$onlySigIsoforms, + keepIsoformInAllConditions = args$keepIsoformInAllConditions2, + showProgress = TRUE, + ) - # Analyze missing annotated isoforms by default - SwitchList <- analyzeNovelIsoformORF( - SwitchList, - analysisAllIsoformsWithoutORF = TRUE, - minORFlength = args$minORFlength, - orfMethod = args$orfMethod, - PTCDistance = args$PTCDistance, - startCodons = "ATG", - stopCodons = c("TAA", "TAG", "TGA"), - showProgress = TRUE, - ) + # Analyze missing annotated isoforms by default + SwitchList <- analyzeNovelIsoformORF( + SwitchList, + analysisAllIsoformsWithoutORF = TRUE, + minORFlength = args$minORFlength, + orfMethod = args$orfMethod, + PTCDistance = args$PTCDistance, + startCodons = "ATG", + stopCodons = c("TAA", "TAG", "TGA"), + showProgress = TRUE, + ) - ### Extract Sequences - SwitchList <- extractSequence( - SwitchList, - onlySwitchingGenes = args$onlySwitchingGenes, - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - extractNTseq = TRUE, - extractAAseq = TRUE, - removeShortAAseq = args$removeShortAAseq, - removeLongAAseq = args$removeLongAAseq, - removeORFwithStop = args$removeORFwithStop, - addToSwitchAnalyzeRlist = TRUE, - writeToFile = TRUE, - pathToOutput = getwd(), - outputPrefix = "isoformSwitchAnalyzeR_isoform", - forceReExtraction = FALSE, - quiet = FALSE - ) + ### Extract Sequences + SwitchList <- extractSequence( + SwitchList, + onlySwitchingGenes = args$onlySwitchingGenes, + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + extractNTseq = TRUE, + extractAAseq = TRUE, + removeShortAAseq = args$removeShortAAseq, + removeLongAAseq = args$removeLongAAseq, + removeORFwithStop = args$removeORFwithStop, + addToSwitchAnalyzeRlist = TRUE, + writeToFile = TRUE, + pathToOutput = getwd(), + outputPrefix = "isoformSwitchAnalyzeR_isoform", + forceReExtraction = FALSE, + quiet = FALSE + ) - ### Summary - switchSummary <- extractSwitchSummary( - SwitchList, - filterForConsequences = args$filterForConsequences, - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - onlySigIsoforms = args$onlySigIsoforms, - ) + ### Summary + switchSummary <- extractSwitchSummary( + SwitchList, + filterForConsequences = args$filterForConsequences, + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + onlySigIsoforms = args$onlySigIsoforms, + ) - save(SwitchList, file = "SwitchList.Rda") - write.table( - switchSummary, - file = "switchSummary.tsv", - quote = FALSE, - sep = "\t", - col.names = TRUE, - row.names = FALSE - ) - + save(SwitchList, file = "SwitchList.Rda") + write.table( + switchSummary, + file = "switchSummary.tsv", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE + ) } if (args$modeSelector == "second_step") { - - # Second part of the analysis - ############################# - - load(file = args$rObject) + # Second part of the analysis + ############################# - ### Add annotation - if (!is.null(args$pathToCPATresultFile)) { - SwitchList <- analyzeCPAT( - SwitchList, - pathToCPATresultFile = args$pathToCPATresultFile, - codingCutoff = args$codingCutoff, - removeNoncodinORFs = args$removeNoncodingORFs - ) - } + load(file = args$rObject) - if (!is.null(args$pathToCPC2resultFile)) { - SwitchList <- analyzeCPC2( - SwitchList, - pathToCPC2resultFile = args$pathToCPC2resultFile, - removeNoncodinORFs = args$removeNoncodingORFs - ) - } - - if (!is.null(args$pathToPFAMresultFile)) { - pfamFiles <- list.files(path = args$pathToPFAMresultFile, - full.names = TRUE) - - SwitchList <- analyzePFAM(SwitchList, - pathToPFAMresultFile = pfamFiles, - showProgress = FALSE) - } + ### Add annotation + if (!is.null(args$pathToCPATresultFile)) { + SwitchList <- analyzeCPAT( + SwitchList, + pathToCPATresultFile = args$pathToCPATresultFile, + codingCutoff = args$codingCutoff, + removeNoncodinORFs = args$removeNoncodingORFs + ) + } - if (!is.null(args$pathToNetSurfP2resultFile)) { - netsurfFiles <- list.files(path = args$pathToNetSurfP2resultFile, - full.names = TRUE) - - SwitchList <- analyzeNetSurfP2( - SwitchList, - pathToNetSurfP2resultFile = netsurfFiles, - smoothingWindowSize = args$smoothingWindowSize, - probabilityCutoff = args$probabilityCutoff, - minIdrSize = args$minIdrSize, - showProgress = TRUE - ) - } + if (!is.null(args$pathToCPC2resultFile)) { + SwitchList <- analyzeCPC2( + SwitchList, + pathToCPC2resultFile = args$pathToCPC2resultFile, + removeNoncodinORFs = args$removeNoncodingORFs + ) + } - if (!is.null(args$pathToIUPred2AresultFile)) { - SwitchList <- analyzeIUPred2A( - SwitchList, - pathToIUPred2AresultFile = args$pathToIUPred2AresultFile, - smoothingWindowSize = args$smoothingWindowSize, - probabilityCutoff = args$probabilityCutoff, - minIdrSize = args$minIdrSize, - annotateBindingSites = args$annotateBindingSites, - minIdrBindingSize = args$minIdrBindingSize, - minIdrBindingOverlapFrac = args$minIdrBindingOverlapFrac, - showProgress = TRUE, - quiet = FALSE - ) - } + if (!is.null(args$pathToPFAMresultFile)) { + pfamFiles <- list.files( + path = args$pathToPFAMresultFile, + full.names = TRUE + ) - if (!is.null(args$pathToSignalPresultFile)) { - signalpFiles <- list.files(path = args$pathToSignalPresultFile, - full.names = TRUE) + SwitchList <- analyzePFAM(SwitchList, + pathToPFAMresultFile = pfamFiles, + showProgress = FALSE + ) + } - SwitchList <- analyzeSignalP( - SwitchList, - pathToSignalPresultFile = signalpFiles, - minSignalPeptideProbability = args$minSignalPeptideProbability - ) - } + if (!is.null(args$pathToNetSurfP2resultFile)) { + netsurfFiles <- list.files( + path = args$pathToNetSurfP2resultFile, + full.names = TRUE + ) - SwitchList <- analyzeAlternativeSplicing( - SwitchList, - onlySwitchingGenes = args$onlySwitchingGenes, - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - showProgress = TRUE - ) - - SwitchList <- analyzeIntronRetention( - SwitchList, - onlySwitchingGenes = args$onlySwitchingGenes, - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - showProgress = TRUE - ) - - consequences <- c( - "intron_retention", - "NMD_status", - "isoform_seq_similarity", - "ORF_genomic", - "tss", - "tts" - ) - - if (!is.null(args$pathToCPATresultFile) || - !is.null(args$pathToCPC2resultFile)) { - updatedConsequences <- c(consequences, "coding_potential") - consequences <- updatedConsequences - } - - if (!is.null(args$pathToPFAMresultFile)) { - updatedConsequences <- c(consequences, "domains_identified") - consequences <- updatedConsequences - } + SwitchList <- analyzeNetSurfP2( + SwitchList, + pathToNetSurfP2resultFile = netsurfFiles, + smoothingWindowSize = args$smoothingWindowSize, + probabilityCutoff = args$probabilityCutoff, + minIdrSize = args$minIdrSize, + showProgress = TRUE + ) + } - if (!is.null(args$pathToSignalPresultFile)) { - updatedConsequences <- c(consequences, "signal_peptide_identified") - consequences <- updatedConsequences - } - - if (!is.null(args$pathToNetSurfP2resultFile) || - !is.null(args$pathToIUPred2AresultFile)) { - updatedConsequences <- c(consequences, "IDR_identified", "IDR_type") - consequences <- updatedConsequences - } + if (!is.null(args$pathToIUPred2AresultFile)) { + SwitchList <- analyzeIUPred2A( + SwitchList, + pathToIUPred2AresultFile = args$pathToIUPred2AresultFile, + smoothingWindowSize = args$smoothingWindowSize, + probabilityCutoff = args$probabilityCutoff, + minIdrSize = args$minIdrSize, + annotateBindingSites = args$annotateBindingSites, + minIdrBindingSize = args$minIdrBindingSize, + minIdrBindingOverlapFrac = args$minIdrBindingOverlapFrac, + showProgress = TRUE, + quiet = FALSE + ) + } - SwitchList <- analyzeSwitchConsequences( - SwitchList, - consequencesToAnalyze = consequences, - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - onlySigIsoforms = args$onlySigIsoforms, - ntCutoff = args$ntCutoff, - ntJCsimCutoff = args$ntJCsimCutoff, - AaCutoff = args$AaCutoff, - AaFracCutoff = args$AaFracCutoff, - AaJCsimCutoff = args$AaJCsimCutoff, - removeNonConseqSwitches = args$removeNonConseqSwitches, - showProgress = TRUE - ) + if (!is.null(args$pathToSignalPresultFile)) { + signalpFiles <- list.files( + path = args$pathToSignalPresultFile, + full.names = TRUE + ) - - ### Visual analysis - # Top genes - - if (args$analysisMode == "single") { + SwitchList <- analyzeSignalP( + SwitchList, + pathToSignalPresultFile = signalpFiles, + minSignalPeptideProbability = args$minSignalPeptideProbability + ) + } - outputFile <- file.path(getwd(), "single_gene.pdf") - - pdf( - file = outputFile, - onefile = FALSE, - height = 6, - width = 9 + SwitchList <- analyzeAlternativeSplicing( + SwitchList, + onlySwitchingGenes = args$onlySwitchingGenes, + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + showProgress = TRUE ) - switchPlot( - SwitchList, - gene = args$gene, - condition1 = myDesign$condition[1], - condition2 = myDesign$condition[length(myDesign$condition)], - IFcutoff = args$IFcutoff, - dIFcutoff = args$dIFcutoff, - rescaleTranscripts = args$rescaleTranscripts, - reverseMinus = args$reverseMinus, - addErrorbars = args$addErrorbars, - logYaxis = FALSE, - localTheme = theme_bw(base_size = 8) - ) - dev.off() - - } else { - mostSwitchingGene <- - extractTopSwitches( + SwitchList <- analyzeIntronRetention( SwitchList, - n = Inf, - filterForConsequences = args$filterForConsequences, - extractGenes = TRUE, + onlySwitchingGenes = args$onlySwitchingGenes, alpha = args$alpha, dIFcutoff = args$dIFcutoff, - inEachComparison = FALSE, - sortByQvals = args$sortByQvals - ) + showProgress = TRUE + ) + + consequences <- c( + "intron_retention", + "NMD_status", + "isoform_seq_similarity", + "ORF_genomic", + "tss", + "tts" + ) + + if (!is.null(args$pathToCPATresultFile) || + !is.null(args$pathToCPC2resultFile)) { + updatedConsequences <- c(consequences, "coding_potential") + consequences <- updatedConsequences + } + + if (!is.null(args$pathToPFAMresultFile)) { + updatedConsequences <- c(consequences, "domains_identified") + consequences <- updatedConsequences + } - write.table( - mostSwitchingGene, - file = "mostSwitchingGene.tsv", - quote = FALSE, - sep = "\t", - col.names = TRUE, - row.names = FALSE + if (!is.null(args$pathToSignalPresultFile)) { + updatedConsequences <- c(consequences, "signal_peptide_identified") + consequences <- updatedConsequences + } + + if (!is.null(args$pathToNetSurfP2resultFile) || + !is.null(args$pathToIUPred2AresultFile)) { + updatedConsequences <- c(consequences, "IDR_identified", "IDR_type") + consequences <- updatedConsequences + } + + SwitchList <- analyzeSwitchConsequences( + SwitchList, + consequencesToAnalyze = consequences, + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + onlySigIsoforms = args$onlySigIsoforms, + ntCutoff = args$ntCutoff, + ntJCsimCutoff = args$ntJCsimCutoff, + AaCutoff = args$AaCutoff, + AaFracCutoff = args$AaFracCutoff, + AaJCsimCutoff = args$AaJCsimCutoff, + removeNonConseqSwitches = args$removeNonConseqSwitches, + showProgress = TRUE ) - switchPlotTopSwitches( - SwitchList, - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - onlySigIsoforms = args$onlySigIsoforms, - n = args$genesToPlot, - sortByQvals = args$sortByQvals, - pathToOutput = getwd(), - fileType = "pdf" - ) + ### Visual analysis + # Top genes + + if (args$analysisMode == "single") { + outputFile <- file.path(getwd(), "single_gene.pdf") - outputFile <- - file.path(getwd(), "extractConsequencesSummary.pdf") - - pdf( - file = outputFile, - onefile = FALSE, - height = 6, - width = 12 - ) + pdf( + file = outputFile, + onefile = FALSE, + height = 6, + width = 9 + ) - consequenceSummary <- extractConsequenceSummary( - SwitchList, - consequencesToAnalyze = "all", - includeCombined = FALSE, - asFractionTotal = args$asFractionTotal, - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - plot = TRUE, - plotGenes = args$plotGenes, - simplifyLocation = args$simplifyLocation, - removeEmptyConsequences = args$removeEmptyConsequences, - returnResult = TRUE, - localTheme = theme_bw() - ) - dev.off() + switchPlot( + SwitchList, + gene = args$gene, + condition1 = myDesign$condition[1], + condition2 = myDesign$condition[length(myDesign$condition)], + IFcutoff = args$IFcutoff, + dIFcutoff = args$dIFcutoff, + rescaleTranscripts = args$rescaleTranscripts, + reverseMinus = args$reverseMinus, + addErrorbars = args$addErrorbars, + logYaxis = FALSE, + localTheme = theme_bw(base_size = 8) + ) + dev.off() + } else { + mostSwitchingGene <- + extractTopSwitches( + SwitchList, + n = Inf, + filterForConsequences = args$filterForConsequences, + extractGenes = TRUE, + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + inEachComparison = FALSE, + sortByQvals = args$sortByQvals + ) - write.table( - consequenceSummary, - file = "consequencesSummary.tsv", - quote = FALSE, - sep = "\t", - col.names = TRUE, - row.names = FALSE - ) + write.table( + mostSwitchingGene, + file = "mostSwitchingGene.tsv", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE + ) - outputFile <- file.path(getwd(), "consequencesEnrichment.pdf") - pdf( - file = outputFile, - onefile = FALSE, - height = 6, - width = 9 - ) - consequenceEnrichment <- extractConsequenceEnrichment( - SwitchList, - consequencesToAnalyze = "all", - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - countGenes = args$countGenes, - analysisOppositeConsequence = args$analysisOppositeConsequence, - plot = TRUE, - localTheme = theme_bw(base_size = 12), - minEventsForPlotting = 10, - returnResult = TRUE - ) - dev.off() + switchPlotTopSwitches( + SwitchList, + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + onlySigIsoforms = args$onlySigIsoforms, + n = args$genesToPlot, + sortByQvals = args$sortByQvals, + pathToOutput = getwd(), + fileType = "pdf" + ) + + outputFile <- + file.path(getwd(), "extractConsequencesSummary.pdf") + + pdf( + file = outputFile, + onefile = FALSE, + height = 6, + width = 12 + ) - write.table( - consequenceEnrichment, - file = "consequencesEnrichment.tsv", - quote = FALSE, - sep = "\t", - col.names = TRUE, - row.names = FALSE - ) + consequenceSummary <- extractConsequenceSummary( + SwitchList, + consequencesToAnalyze = "all", + includeCombined = FALSE, + asFractionTotal = args$asFractionTotal, + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + plot = TRUE, + plotGenes = args$plotGenes, + simplifyLocation = args$simplifyLocation, + removeEmptyConsequences = args$removeEmptyConsequences, + returnResult = TRUE, + localTheme = theme_bw() + ) + dev.off() + + write.table( + consequenceSummary, + file = "consequencesSummary.tsv", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE + ) - outputFile <- file.path(getwd(), "splicingEnrichment.pdf") - pdf( - file = outputFile, - onefile = FALSE, - height = 6, - width = 9 - ) - splicingEnrichment <- extractSplicingEnrichment( - SwitchList, - splicingToAnalyze = "all", - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - onlySigIsoforms = args$onlySigIsoforms, - countGenes = args$countGenes, - plot = TRUE, - minEventsForPlotting = 10, - returnResult = TRUE - ) - dev.off() + outputFile <- file.path(getwd(), "consequencesEnrichment.pdf") + pdf( + file = outputFile, + onefile = FALSE, + height = 6, + width = 9 + ) + consequenceEnrichment <- extractConsequenceEnrichment( + SwitchList, + consequencesToAnalyze = "all", + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + countGenes = args$countGenes, + analysisOppositeConsequence = args$analysisOppositeConsequence, + plot = TRUE, + localTheme = theme_bw(base_size = 12), + minEventsForPlotting = 10, + returnResult = TRUE + ) + dev.off() - write.table( - splicingEnrichment, - file = "splicingEnrichment.tsv", - quote = FALSE, - sep = "\t", - col.names = TRUE, - row.names = FALSE - ) + write.table( + consequenceEnrichment, + file = "consequencesEnrichment.tsv", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE + ) - outputFile <- file.path(getwd(), "splicingSummary.pdf") - pdf( - file = outputFile, - onefile = FALSE, - height = 6, - width = 12 - ) - splicingSummary <- extractSplicingSummary( - SwitchList, - splicingToAnalyze = "all", - asFractionTotal = args$asFractionTotal, - alpha = args$alpha, - dIFcutoff = args$dIFcutoff, - onlySigIsoforms = args$onlySigIsoforms, - plot = TRUE, - plotGenes = args$plotGenes, - localTheme = theme_bw(), - returnResult = TRUE - ) - dev.off() + outputFile <- file.path(getwd(), "splicingEnrichment.pdf") + pdf( + file = outputFile, + onefile = FALSE, + height = 6, + width = 9 + ) + splicingEnrichment <- extractSplicingEnrichment( + SwitchList, + splicingToAnalyze = "all", + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + onlySigIsoforms = args$onlySigIsoforms, + countGenes = args$countGenes, + plot = TRUE, + minEventsForPlotting = 10, + returnResult = TRUE + ) + dev.off() - write.table( - splicingSummary, - file = "splicingSummary.tsv", - quote = FALSE, - sep = "\t", - col.names = TRUE, - row.names = FALSE - ) + write.table( + splicingEnrichment, + file = "splicingEnrichment.tsv", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE + ) + - write.table( - SwitchList$switchConsequence, - file = "switchConsequence_fulldata.tsv", - quote = FALSE, - sep = "\t", - col.names = TRUE, - row.names = FALSE - ) + outputFile <- file.path(getwd(), "splicingSummary.pdf") + pdf( + file = outputFile, + onefile = FALSE, + height = 6, + width = 12 + ) + splicingSummary <- extractSplicingSummary( + SwitchList, + splicingToAnalyze = "all", + asFractionTotal = args$asFractionTotal, + alpha = args$alpha, + dIFcutoff = args$dIFcutoff, + onlySigIsoforms = args$onlySigIsoforms, + plot = TRUE, + plotGenes = args$plotGenes, + localTheme = theme_bw(), + returnResult = TRUE + ) + dev.off() - write.table( - SwitchList$AlternativeSplicingAnalysis, - file = "switchSplicing_fulldata.tsv", - quote = FALSE, - sep = "\t", - col.names = TRUE, - row.names = FALSE - ) + write.table( + splicingSummary, + file = "splicingSummary.tsv", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE + ) - write.table( - SwitchList$isoformFeatures, - file = "IsoformFeatures.tsv", - quote = FALSE, - sep = "\t", - col.names = TRUE, - row.names = FALSE - ) + write.table( + SwitchList$switchConsequence, + file = "switchConsequence_fulldata.tsv", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE + ) - ### Volcano like plot: - outputFile <- file.path(getwd(), "volcanoPlot.pdf") + write.table( + SwitchList$AlternativeSplicingAnalysis, + file = "switchSplicing_fulldata.tsv", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE + ) - pdf( - file = outputFile, - onefile = FALSE, - height = 6, - width = 9 - ) + write.table( + SwitchList$isoformFeatures, + file = "IsoformFeatures.tsv", + quote = FALSE, + sep = "\t", + col.names = TRUE, + row.names = FALSE + ) + + ### Volcano like plot: + outputFile <- file.path(getwd(), "volcanoPlot.pdf") - p <- ggplot(data = SwitchList$isoformFeatures, aes(x = dIF, y = -log10(isoform_switch_q_value))) + - geom_point( - aes(color = abs(dIF) > 0.1 & isoform_switch_q_value < 0.05), # default cutoff - size = 1 - ) + - geom_hline(yintercept = -log10(0.05), linetype = "dashed") + # default cutoff - geom_vline(xintercept = c(-0.1, 0.1), linetype = "dashed") + # default cutoff - facet_wrap(~ condition_2) + - #facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions - scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) + - labs(x = "dIF", y = "-Log10 ( Isoform Switch Q Value )") + - theme_bw() - print(p) - dev.off() + pdf( + file = outputFile, + onefile = FALSE, + height = 6, + width = 9 + ) + + p <- ggplot(data = SwitchList$isoformFeatures, aes(x = dIF, y = -log10(isoform_switch_q_value))) + + geom_point( + aes(color = abs(dIF) > 0.1 & isoform_switch_q_value < 0.05), # default cutoff + size = 1 + ) + + geom_hline(yintercept = -log10(0.05), linetype = "dashed") + # default cutoff + geom_vline(xintercept = c(-0.1, 0.1), linetype = "dashed") + # default cutoff + facet_wrap(~condition_2) + + # facet_grid(condition_1 ~ condition_2) + # alternative to facet_wrap if you have overlapping conditions + scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) + + labs(x = "dIF", y = "-Log10 ( Isoform Switch Q Value )") + + theme_bw() + print(p) + dev.off() - ### Switch vs Gene changes: - outputFile <- file.path(getwd(), "switchGene.pdf") - pdf( - file = outputFile, - height = 6, - width = 9 - ) - p <- ggplot(data = SwitchList$isoformFeatures, - aes(x = gene_log2_fold_change, y = dIF)) + - geom_point(aes(color = abs(dIF) > 0.1 & - isoform_switch_q_value < 0.05), - size = 1) + - facet_wrap(~ condition_2) + - geom_hline(yintercept = 0, linetype = "dashed") + - geom_vline(xintercept = 0, linetype = "dashed") + - scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) + - labs(x = "Gene log2 fold change", y = "dIF") + - theme_bw() - print(p) - dev.off() + ### Switch vs Gene changes: + outputFile <- file.path(getwd(), "switchGene.pdf") + pdf( + file = outputFile, + height = 6, + width = 9 + ) + p <- ggplot( + data = SwitchList$isoformFeatures, + aes(x = gene_log2_fold_change, y = dIF) + ) + + geom_point( + aes(color = abs(dIF) > 0.1 & + isoform_switch_q_value < 0.05), + size = 1 + ) + + facet_wrap(~condition_2) + + geom_hline(yintercept = 0, linetype = "dashed") + + geom_vline(xintercept = 0, linetype = "dashed") + + scale_color_manual("Signficant\nIsoform Switch", values = c("black", "red")) + + labs(x = "Gene log2 fold change", y = "dIF") + + theme_bw() + print(p) + dev.off() - outputFile <- file.path(getwd(), "splicingGenomewide.pdf") - pdf( - file = outputFile, - onefile = FALSE, - height = 6, - width = 14 - ) - splicingGenomeWide <- extractSplicingGenomeWide( - SwitchList, - featureToExtract = "isoformUsage", - splicingToAnalyze = "all", - plot = TRUE, - returnResult = TRUE - ) - dev.off() - } - save(SwitchList, file = "SwitchList.Rda") - + outputFile <- file.path(getwd(), "splicingGenomewide.pdf") + pdf( + file = outputFile, + onefile = FALSE, + height = 6, + width = 14 + ) + splicingGenomeWide <- extractSplicingGenomeWide( + SwitchList, + featureToExtract = "isoformUsage", + splicingToAnalyze = "all", + plot = TRUE, + returnResult = TRUE + ) + dev.off() + } + save(SwitchList, file = "SwitchList.Rda") }
--- a/isoformswitchanalyzer.xml Wed Dec 20 18:03:04 2023 +0000 +++ b/isoformswitchanalyzer.xml Wed Jan 14 09:30:42 2026 +0000 @@ -60,28 +60,71 @@ #set $filename = 'abundance.tsv' #end if + ## read cofactor repeats and generate cofactor_map dictionary + ## cofactor_map structure: {file_path: {cofactor_name: cofactor_level_name, ...}, ...} + #set $cofactor_map = {} + #set $cofactor_names = [] + #for $cofactor_item in $functionMode.tool_source.cofactor: + #set $cofactor_name = str($cofactor_item.cofactor_name) + $cofactor_names.append($cofactor_name) + #for $level_item in $cofactor_item.cofactor_level: + #set $level_name = str($level_item.cofactor_level_name) + #for $level_file in $level_item.cofactor_level_files: + #set $file_path = str($level_file) + #if $file_path not in $cofactor_map: + #set $cofactor_map[$file_path] = {} + #end if + #set $cofactor_map[$file_path][$cofactor_name] = $level_name + #end for + #end for + #end for + + ## create file_to_sample_map dictionary + #set $file_to_sample_map = {} + #for $index in range(len($functionMode.tool_source.first_factor.trans_counts)): + #set $sampleID = str($functionMode.tool_source.first_factor.factorLevel) + str($index) + #set $trans_file = $functionMode.tool_source.first_factor.trans_counts[$index] + #set $file_to_sample_map[$sampleID] = str($trans_file) $conditions.append($functionMode.tool_source.first_factor.factorLevel) - $sampleIDs.append(str($functionMode.tool_source.first_factor.factorLevel) + str($index)) + $sampleIDs.append($sampleID) $replicates.append($index) mkdir './input_files/${functionMode.tool_source.first_factor.factorLevel}${index}/' && - ln -s $functionMode.tool_source.first_factor.trans_counts[$index] './input_files/${functionMode.tool_source.first_factor.factorLevel}${index}/${filename}' && + ln -s $trans_file './input_files/${functionMode.tool_source.first_factor.factorLevel}${index}/${filename}' && #end for #for $index in range(len($functionMode.tool_source.second_factor.trans_counts)): + #set $sampleID = str($functionMode.tool_source.second_factor.factorLevel) + str($index) + #set $trans_file = $functionMode.tool_source.second_factor.trans_counts[$index] + #set $file_to_sample_map[$sampleID] = str($trans_file) $conditions.append($functionMode.tool_source.second_factor.factorLevel) - $sampleIDs.append(str($functionMode.tool_source.second_factor.factorLevel) + str($index)) + $sampleIDs.append($sampleID) $replicates.append($index) mkdir './input_files/${functionMode.tool_source.second_factor.factorLevel}${index}/' && - ln -s $functionMode.tool_source.second_factor.trans_counts[$index] './input_files/${functionMode.tool_source.second_factor.factorLevel}${index}/${filename}' && + ln -s $trans_file './input_files/${functionMode.tool_source.second_factor.factorLevel}${index}/${filename}' && #end for + Rscript '${__tool_directory__}/IsoformSwitchAnalyzeR.R' #for $i, $condition in enumerate($conditions) --condition $condition --sampleID $sampleIDs[$i] --replicate $replicates[$i] + #set $sample_file = $file_to_sample_map[$sampleIDs[$i]] + #if $sample_file in $cofactor_map + #set $cofactor_values = [] + #for $cofactor_name in $cofactor_names: + #if $cofactor_name in $cofactor_map[$sample_file] + #set $cofactor_value = $cofactor_map[$sample_file][$cofactor_name] + #silent $cofactor_values.append(str($cofactor_value)) + #end if + #end for + --cofactors $(','.join($cofactor_values)) + #end if #end for + #if len($cofactor_names) > 0 + --cofactorNames $(','.join($cofactor_names)) + #end if $functionMode.pairedSamples --modeSelector $functionMode.selector --parentDir './input_files' @@ -648,7 +691,15 @@ </assert_contents> </output> <output name="matrix_counts" file="test02_counts.tabular" ftype="tabular" lines_diff="6"/> - <output name="sample_annotation" file="test02_samples_annotation.tabular" ftype="tabular"/> + <output name="sample_annotation"> + <assert_contents> + <has_text_matching expression="sampleID\s+condition"/> + <has_text_matching expression="health0\s+health"/> + <has_text_matching expression="health1\s+health"/> + <has_text_matching expression="cancer0\s+cancer"/> + <has_text_matching expression="cancer1\s+cancer"/> + </assert_contents> + </output> </test> <!-- Test 03: Data import mode generate collection count files--> <test expect_num_outputs="3"> @@ -1115,7 +1166,82 @@ <has_size value="652170" delta="300"/> </assert_contents> </output> - <output name="sample_annotation" file="test10_samples_annotation.tabular" ftype="tabular"/> + <output name="sample_annotation"> + <assert_contents> + <has_text_matching expression="sampleID\s+condition\s+replicate"/> + <has_text_matching expression="health0\s+health\s+0"/> + <has_text_matching expression="health1\s+health\s+1"/> + <has_text_matching expression="cancer0\s+cancer\s+0"/> + <has_text_matching expression="cancer1\s+cancer\s+1"/> + </assert_contents> + </output> + </test> + <!-- Test 11: Data import mode add cofactor--> + <test expect_num_outputs="3"> + <conditional name="functionMode"> + <param name="selector" value="data_import"/> + <param name="genomeAnnotation" value="gencode.hg19.chr10_1000.gtf.gz"/> + <param name="transcriptome" value="transcriptome_stringtie.fasta.gz"/> + <param name="countFiles" value="matrix"/> + <conditional name="tool_source"> + <param name="selector" value="stringtie"/> + <conditional name="novoisoforms"> + <param name="selector" value="novel"/> + <param name="stringtieAnnotation" value="annotation_stringtie.gtf.gz"/> + </conditional> + <section name="first_factor"> + <param name="factorLevel" value="EWS-FLI1"/> + <param name="trans_counts" value="ASP14_1.tabular,ASP14_2.tabular,ASP14_3.tabular"/> + </section> + <section name="second_factor"> + <param name="factorLevel" value="no-EWS-FLI1"/> + <param name="trans_counts" value="ASP14_doxycycline_1.tabular,ASP14_doxycycline_2.tabular,ASP14_doxycycline_3.tabular"/> + </section> + <repeat name="cofactor"> + <param name="cofactor_name" value="Batch"/> + <repeat name="cofactor_level"> + <param name="cofactor_level_name" value="batch1"/> + <param name="cofactor_level_files" value="ASP14_1.tabular,ASP14_doxycycline_1.tabular"/> + </repeat> + <repeat name="cofactor_level"> + <param name="cofactor_level_name" value="batch2"/> + <param name="cofactor_level_files" value="ASP14_2.tabular,ASP14_doxycycline_2.tabular"/> + </repeat> + <repeat name="cofactor_level"> + <param name="cofactor_level_name" value="batch3"/> + <param name="cofactor_level_files" value="ASP14_3.tabular,ASP14_doxycycline_3.tabular"/> + </repeat> + </repeat> + <repeat name="cofactor"> + <param name="cofactor_name" value="Age"/> + <repeat name="cofactor_level"> + <param name="cofactor_level_name" value="20"/> + <param name="cofactor_level_files" value="ASP14_1.tabular,ASP14_doxycycline_1.tabular,ASP14_2.tabular"/> + </repeat> + <repeat name="cofactor_level"> + <param name="cofactor_level_name" value="60"/> + <param name="cofactor_level_files" value="ASP14_doxycycline_2.tabular,ASP14_3.tabular,ASP14_doxycycline_3.tabular"/> + </repeat> + </repeat> + </conditional> + </conditional> + <output name="switchList" ftype="rdata"> + <assert_contents> + <has_size value="374542" delta="300"/> + </assert_contents> + </output> + <output name="matrix_counts" file="stringtie_counts.tabular" ftype="tabular" lines_diff="6"/> + <output name="sample_annotation"> + <assert_contents> + <has_text_matching expression="sampleID\s+condition\s+Batch\s+Age"/> + <has_text_matching expression="EWSXFLI10\s+EWSXFLI1\s+batch1\s+20"/> + <has_text_matching expression="EWSXFLI11\s+EWSXFLI1\s+batch2\s+20"/> + <has_text_matching expression="EWSXFLI12\s+EWSXFLI1\s+batch3\s+60"/> + <has_text_matching expression="noXEWSXFLI10\s+noXEWSXFLI1\s+batch1\s+20"/> + <has_text_matching expression="noXEWSXFLI11\s+noXEWSXFLI1\s+batch2\s+60"/> + <has_text_matching expression="noXEWSXFLI12\s+noXEWSXFLI1\s+batch3\s+60"/> + </assert_contents> + </output> </test> </tests> <help><