comparison dexseq.R @ 7:62adf13b86ea draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/dexseq commit 06f2c57b523aab7c997d82e1345fd23f178de598"
author iuc
date Fri, 19 Mar 2021 09:45:03 +0000
parents 278b189248cd
children df929f257179
comparison
equal deleted inserted replaced
6:9fd8b69e6e68 7:62adf13b86ea
1 ## Setup R error handling to go to stderr 1 ## Setup R error handling to go to stderr
2 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) 2 options(show.error.messages = F, error = function() {
3 cat(geterrmessage(), file = stderr()); q("no", 1, F)
4 })
3 # we need that to not crash galaxy with an UTF8 error on German LC settings. 5 # we need that to not crash galaxy with an UTF8 error on German LC settings.
4 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 6 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
5 7
6 suppressPackageStartupMessages({ 8 suppressPackageStartupMessages({
7 library("DEXSeq") 9 library("DEXSeq")
8 library('getopt') 10 library("getopt")
9 library('rjson') 11 library("rjson")
10 }) 12 })
11 13
12 14
13 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) 15 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
14 args <- commandArgs(trailingOnly = TRUE) 16 args <- commandArgs(trailingOnly = TRUE)
15 17
16 #get options, using the spec as defined by the enclosed list. 18 #get options, using the spec as defined by the enclosed list.
17 #we read the options from the default: commandArgs(TRUE). 19 #we read the options from the default: commandArgs(TRUE).
18 spec = matrix(c( 20 spec <- matrix(c(
19 'verbose', 'v', 2, "integer", 21 "verbose", "v", 2, "integer",
20 'help', 'h', 0, "logical", 22 "help", "h", 0, "logical",
21 'gtf', 'a', 1, "character", 23 "gtf", "a", 1, "character",
22 'outfile', 'o', 1, "character", 24 "outfile", "o", 1, "character",
23 'reportdir', 'r', 1, "character", 25 "reportdir", "r", 1, "character",
24 'rds', 'd', 1, "character", 26 "rds", "d", 1, "character",
25 'factors', 'f', 1, "character", 27 "factors", "f", 1, "character",
26 'threads', 'p', 1, "integer", 28 "threads", "p", 1, "integer",
27 'fdr', 'c', 1, "double" 29 "fdr", "c", 1, "double"
28 ), byrow=TRUE, ncol=4); 30 ), byrow = TRUE, ncol = 4);
29 opt = getopt(spec); 31 opt <- getopt(spec);
30 32
31 # if help was asked for print a friendly message 33 # if help was asked for print a friendly message
32 # and exit with a non-zero error code 34 # and exit with a non-zero error code
33 if ( !is.null(opt$help) ) { 35 if (!is.null(opt$help)) {
34 cat(getopt(spec, usage=TRUE)); 36 cat(getopt(spec, usage = TRUE));
35 q(status=1); 37 q(status = 1);
36 } 38 }
37 39
38 trim <- function (x) gsub("^\\s+|\\s+$", "", x) 40 trim <- function(x) gsub("^\\s+|\\s+$", "", x)
39 opt$samples <- trim(opt$samples) 41 opt$samples <- trim(opt$samples)
40 opt$factors <- trim(opt$factors) 42 opt$factors <- trim(opt$factors)
41 43
42 parser <- newJSONParser() 44 parser <- newJSONParser()
43 parser$addData( opt$factors ) 45 parser$addData(opt$factors)
44 factorsList <- parser$getObject() 46 factors_list <- parser$getObject()
45 47
46 sampleTable<-data.frame() 48 sample_table <- data.frame()
47 countFiles<-c() 49 count_files <- c()
48 factorNames<-c() 50 factor_names <- c()
49 primaryFactor<-"" 51 primary_factor <- ""
50 for(factor in factorsList){ 52 for (factor in factors_list) {
51 factorName<-factor[[1]] 53 factor_name <- factor[[1]]
52 factorNames<-append(factorNames, paste(factorName,"exon",sep=":")) 54 factor_names <- append(factor_names, paste(factor_name, "exon", sep = ":"))
53 factorValuesMapList<-factor[[2]] 55 factor_values_map_list <- factor[[2]]
54 c = length(factorValuesMapList) 56 c <- length(factor_values_map_list)
55 for (factorValuesMap in factorValuesMapList){ 57 for (factorValuesMap in factor_values_map_list) {
56 for(files in factorValuesMap){ 58 for (files in factorValuesMap) {
57 for(file in files){ 59 for (file in files) {
58 if(primaryFactor == "") { 60 if (primary_factor == "") {
59 countFiles<-append(countFiles,file) 61 count_files <- append(count_files, file)
60 } 62 }
61 sampleTable[basename(file),factorName]<-paste(c,names(factorValuesMap),sep="_") 63 sample_table[basename(file), factor_name] <- paste(c, names(factorValuesMap), sep = "_")
62 } 64 }
63 } 65 }
64 c = c-1 66 c <- c - 1
65 } 67 }
66 if(primaryFactor == ""){ 68 if (primary_factor == "") {
67 primaryFactor <- factorName 69 primary_factor <- factor_name
68 } 70 }
69 } 71 }
70 72
71 factorNames<-append(factorNames,"exon") 73 factor_names <- append(factor_names, "exon")
72 factorNames<-append(factorNames,"sample") 74 factor_names <- append(factor_names, "sample")
73 factorNames<-rev(factorNames) 75 factor_names <- rev(factor_names)
74 formulaFullModel <- as.formula(paste("", paste(factorNames, collapse=" + "), sep=" ~ ")) 76 formula_full_model <- as.formula(paste("", paste(factor_names, collapse = " + "), sep = " ~ "))
75 factorNames <- head(factorNames,-1) 77 factor_names <- head(factor_names, -1)
76 formulaReducedModel <- as.formula(paste("", paste(factorNames, collapse=" + "), sep=" ~ ")) 78 formula_reduced_model <- as.formula(paste("", paste(factor_names, collapse = " + "), sep = " ~ "))
77 79
78 sampleTable 80 sample_table
79 formulaFullModel 81 formula_full_model
80 formulaReducedModel 82 formula_reduced_model
81 primaryFactor 83 primary_factor
82 countFiles 84 count_files
83 opt$reportdir 85 opt$reportdir
84 opt$threads 86 opt$threads
85 getwd() 87 getwd()
86 88
87 dxd = DEXSeqDataSetFromHTSeq(countFiles, sampleData=sampleTable, design= formulaFullModel, flattenedfile=opt$gtf) 89 dxd <- DEXSeqDataSetFromHTSeq(count_files, sampleData = sample_table, design = formula_full_model, flattenedfile = opt$gtf)
88 90
89 colData(dxd) 91 colData(dxd)
90 dxd <- estimateSizeFactors(dxd) 92 dxd <- estimateSizeFactors(dxd)
91 print("Estimated size factors") 93 print("Estimated size factors")
92 sizeFactors(dxd) 94 sizeFactors(dxd)
93 BPPARAM=MulticoreParam(workers=opt$threads) 95 bpparam <- MulticoreParam(workers = opt$threads)
94 dxd <- estimateDispersions(dxd, formula=formulaFullModel, BPPARAM=BPPARAM) 96 dxd <- estimateDispersions(dxd, formula = formula_full_model, BPPARAM = bpparam)
95 print("Estimated dispersions") 97 print("Estimated dispersions")
96 dxd <- testForDEU(dxd, reducedModel=formulaReducedModel, fullModel=formulaFullModel, BPPARAM=BPPARAM) 98 dxd <- testForDEU(dxd, reducedModel = formula_reduced_model, fullModel = formula_full_model, BPPARAM = bpparam)
97 print("tested for DEU") 99 print("tested for DEU")
98 dxd <- estimateExonFoldChanges(dxd, fitExpToVar=primaryFactor, BPPARAM=BPPARAM) 100 dxd <- estimateExonFoldChanges(dxd, fitExpToVar = primary_factor, BPPARAM = bpparam)
99 print("Estimated fold changes") 101 print("Estimated fold changes")
100 res <- DEXSeqResults(dxd) 102 res <- DEXSeqResults(dxd)
101 print("Results") 103 print("Results")
102 table(res$padj <= opt$fdr) 104 table(res$padj <= opt$fdr)
103 resSorted <- res[order(res$padj),] 105 res_sorted <- res[order(res$padj), ]
104 head(resSorted) 106 head(res_sorted)
105 107
106 export_table <- as.data.frame(resSorted) 108 export_table <- as.data.frame(res_sorted)
107 last_column <- ncol(export_table) 109 last_column <- ncol(export_table)
108 for(i in 1:nrow(export_table)) { 110 for (i in seq_len(nrow(export_table))) {
109 export_table[i,last_column] <- paste(export_table[i,last_column][[1]],collapse=", ") 111 export_table[i, last_column] <- paste(export_table[i, last_column][[1]], collapse = ", ")
110 } 112 }
111 write.table(export_table, file = opt$outfile, sep="\t", quote = FALSE, col.names = FALSE) 113 write.table(export_table, file = opt$outfile, sep = "\t", quote = FALSE, col.names = FALSE)
112 print("Written Results") 114 print("Written Results")
113 115
114 if ( !is.null(opt$rds) ) { 116 if (!is.null(opt$rds)) {
115 saveRDS(res, file="DEXSeqResults.rds") 117 saveRDS(res, file = "DEXSeqResults.rds")
116 } 118 }
117 119
118 if ( !is.null(opt$reportdir) ) { 120 if (!is.null(opt$reportdir)) {
119 DEXSeqHTML(res, fitExpToVar=primaryFactor, path=opt$reportdir, FDR=opt$fdr, color=c("#B7FEA0", "#FF8F43", "#637EE9", "#FF0000", "#F1E7A1", "#C3EEE7","#CEAEFF", "#EDC3C5", "#AAA8AA")) 121 DEXSeqHTML(res, fitExpToVar = primary_factor, path = opt$reportdir, FDR = opt$fdr, color = c("#B7FEA0", "#FF8F43", "#637EE9", "#FF0000", "#F1E7A1", "#C3EEE7", "#CEAEFF", "#EDC3C5", "#AAA8AA"))
120 } 122 }
121 sessionInfo() 123 sessionInfo()