comparison diffbind.R @ 17:2605cbdaa7d8 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 3da34ac6e5b18fd5deacaf31b757aca6bae82251
author iuc
date Fri, 15 Dec 2023 19:39:14 +0000
parents 163688bb8f73
children f907216064f6
comparison
equal deleted inserted replaced
16:163688bb8f73 17:2605cbdaa7d8
1 ## Setup R error handling to go to stderr 1 ## Setup R error handling to go to stderr
2 options(show.error.messages = F, error = function() { 2 options(show.error.messages = FALSE, error = function() {
3 cat(geterrmessage(), file = stderr()); q("no", 1, F) 3 cat(geterrmessage(), file = stderr())
4 q("no", 1, FALSE)
4 }) 5 })
5 # we need that to not crash galaxy with an UTF8 error on German LC settings. 6 # we need that to not crash galaxy with an UTF8 error on German LC settings.
6 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") 7 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
7 8
8 suppressPackageStartupMessages({ 9 suppressPackageStartupMessages({
9 library("getopt") 10 library("getopt")
10 library("DiffBind") 11 library("DiffBind")
11 library("rjson") 12 library("rjson")
12 }) 13 })
13 14
14 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) 15 options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
15 args <- commandArgs(trailingOnly = TRUE) 16 args <- commandArgs(trailingOnly = TRUE)
16 17
17 #get options, using the spec as defined by the enclosed list. 18 #get options, using the spec as defined by the enclosed list.
18 #we read the options from the default: commandArgs(TRUE). 19 #we read the options from the default: commandArgs(TRUE).
19 spec <- matrix(c( 20 spec <- matrix(c(
20 "infile", "i", 1, "character", 21 "infile", "i", 1, "character",
21 "outfile", "o", 1, "character", 22 "outfile", "o", 1, "character",
22 "scorecol", "n", 1, "integer", 23 "scorecol", "n", 1, "integer",
23 "lowerbetter", "l", 1, "logical", 24 "lowerbetter", "l", 1, "logical",
24 "summits", "s", 1, "integer", 25 "summits", "s", 1, "integer",
25 "th", "t", 1, "double", 26 "th", "t", 1, "double",
26 "format", "f", 1, "character", 27 "format", "f", 1, "character",
27 "plots", "p", 2, "character", 28 "plots", "p", 2, "character",
28 "bmatrix", "b", 0, "logical", 29 "bmatrix", "b", 0, "logical",
29 "rdaOpt", "r", 0, "logical", 30 "rdaOpt", "r", 0, "logical",
30 "infoOpt", "a", 0, "logical", 31 "infoOpt", "a", 0, "logical",
31 "verbose", "v", 2, "integer", 32 "verbose", "v", 2, "integer",
32 "help", "h", 0, "logical" 33 "help", "h", 0, "logical"
33 ), byrow = TRUE, ncol = 4); 34 ), byrow = TRUE, ncol = 4)
34 35
35 opt <- getopt(spec); 36 opt <- getopt(spec)
36 37
37 # if help was asked for print a friendly message 38 # if help was asked for print a friendly message
38 # and exit with a non-zero error code 39 # and exit with a non-zero error code
39 if (!is.null(opt$help)) { 40 if (!is.null(opt$help)) {
40 cat(getopt(spec, usage = TRUE)); 41 cat(getopt(spec, usage = TRUE))
41 q(status = 1); 42 q(status = 1)
42 } 43 }
43 44
44 parser <- newJSONParser() 45 parser <- newJSONParser()
45 parser$addData(opt$infile) 46 parser$addData(opt$infile)
46 factor_list <- parser$getObject() 47 factor_list <- parser$getObject()
52 # get the group and sample id from the peaks filenames 53 # get the group and sample id from the peaks filenames
53 groups <- sapply(strsplit(peaks, "-"), `[`, 1) 54 groups <- sapply(strsplit(peaks, "-"), `[`, 1)
54 samples <- sapply(strsplit(peaks, "-"), `[`, 2) 55 samples <- sapply(strsplit(peaks, "-"), `[`, 2)
55 56
56 if (length(ctrls) != 0) { 57 if (length(ctrls) != 0) {
57 sample_table <- data.frame(SampleID = samples, 58 sample_table <- data.frame(
58 Condition = groups, 59 SampleID = samples,
59 bamReads = bams, 60 Condition = groups,
60 bamControl = ctrls, 61 bamReads = bams,
61 Peaks = peaks, 62 bamControl = ctrls,
62 Tissue = samples) # using "Tissue" column to display ids as labels in PCA plot 63 Peaks = peaks,
64 Tissue = samples
65 ) # using "Tissue" column to display ids as labels in PCA plot
63 } else { 66 } else {
64 67 sample_table <- data.frame(
65 sample_table <- data.frame(SampleID = samples, 68 SampleID = samples,
66 Replicate = samples, 69 Replicate = samples,
67 Condition = groups, 70 Condition = groups,
68 bamReads = bams, 71 bamReads = bams,
69 Peaks = peaks, 72 Peaks = peaks,
70 Tissue = samples) 73 Tissue = samples
74 )
71 } 75 }
72 76
73 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter) 77 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter)
74 78
75 if (!is.null(opt$summits)) { 79 if (!is.null(opt$summits)) {
76 sample_count <- dba.count(sample, summits = opt$summits) 80 sample_count <- dba.count(sample, summits = opt$summits)
77 } else { 81 } else {
78 sample_count <- dba.count(sample) 82 sample_count <- dba.count(sample)
79 } 83 }
80 84
81 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) 85 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2)
82 sample_analyze <- dba.analyze(sample_contrast) 86 sample_analyze <- dba.analyze(sample_contrast)
83 diff_bind <- dba.report(sample_analyze, th = opt$th) 87 diff_bind <- dba.report(sample_analyze, th = opt$th)
84 88
85 # Generate plots 89 # Generate plots
86 if (!is.null(opt$plots)) { 90 if (!is.null(opt$plots)) {
87 pdf(opt$plots) 91 pdf(opt$plots)
88 orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) 92 orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th)
89 dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) 93 dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3)
90 dba.plotMA(sample_analyze, th = opt$th) 94 dba.plotMA(sample_analyze, th = opt$th)
91 dba.plotVolcano(sample_analyze, th = opt$th) 95 dba.plotVolcano(sample_analyze, th = opt$th)
92 dba.plotBox(sample_analyze, th = opt$th) 96 dba.plotBox(sample_analyze, th = opt$th)
93 dev.off() 97 dev.off()
94 } 98 }
95 99
96 # Output differential binding sites 100 # Output differential binding sites
97 res_sorted <- diff_bind[order(diff_bind$FDR), ] 101 res_sorted <- diff_bind[order(diff_bind$FDR), ]
98 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) 102 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/)
99 if (opt$format == "bed") { 103 if (opt$format == "bed") {
100 res_sorted <- data.frame(Chrom = seqnames(res_sorted), 104 res_sorted <- data.frame(
101 Start = start(res_sorted) - 1, 105 Chrom = seqnames(res_sorted),
102 End = end(res_sorted), 106 Start = start(res_sorted) - 1,
103 Name = rep("DiffBind", length(res_sorted)), 107 End = end(res_sorted),
104 Score = rep("0", length(res_sorted)), 108 Name = rep("DiffBind", length(res_sorted)),
105 Strand = gsub("\\*", ".", strand(res_sorted))) 109 Score = rep("0", length(res_sorted)),
110 Strand = gsub("\\*", ".", strand(res_sorted))
111 )
106 } else if (opt$format == "interval") { 112 } else if (opt$format == "interval") {
107 # Output as interval 113 # Output as interval
108 df <- as.data.frame(res_sorted) 114 df <- as.data.frame(res_sorted)
109 extrainfo <- NULL 115 extrainfo <- NULL
110 for (i in seq_len(nrow(df))) { 116 for (i in seq_len(nrow(df))) {
111 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") 117 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|")
112 } 118 }
113 res_sorted <- data.frame(Chrom = seqnames(res_sorted), 119 res_sorted <- data.frame(
114 Start = start(res_sorted) - 1, 120 Chrom = seqnames(res_sorted),
115 End = end(res_sorted), 121 Start = start(res_sorted) - 1,
116 Name = rep("DiffBind", length(res_sorted)), 122 End = end(res_sorted),
117 Score = rep("0", length(res_sorted)), 123 Name = rep("DiffBind", length(res_sorted)),
118 Strand = gsub("\\*", ".", strand(res_sorted)), 124 Score = rep("0", length(res_sorted)),
119 Comment = extrainfo) 125 Strand = gsub("\\*", ".", strand(res_sorted)),
126 Comment = extrainfo
127 )
120 } else { 128 } else {
121 # Output as 0-based tabular 129 # Output as 0-based tabular
122 res_sorted <- data.frame(Chrom = seqnames(res_sorted), 130 res_sorted <- data.frame(
123 Start = start(res_sorted) - 1, 131 Chrom = seqnames(res_sorted),
124 End = end(res_sorted), 132 Start = start(res_sorted) - 1,
125 Name = rep("DiffBind", length(res_sorted)), 133 End = end(res_sorted),
126 Score = rep("0", length(res_sorted)), 134 Name = rep("DiffBind", length(res_sorted)),
127 Strand = gsub("\\*", ".", strand(res_sorted)), 135 Score = rep("0", length(res_sorted)),
128 mcols(res_sorted)) 136 Strand = gsub("\\*", ".", strand(res_sorted)),
137 mcols(res_sorted)
138 )
129 } 139 }
130 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) 140 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE)
131 141
132 # Output binding affinity scores 142 # Output binding affinity scores
133 if (!is.null(opt$bmatrix)) { 143 if (!is.null(opt$bmatrix)) {
134 bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) 144 bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME)
135 # Output as 0-based tabular 145 # Output as 0-based tabular
136 bmat <- data.frame(Chrom = bmat[, 1], 146 bmat <- data.frame(
137 Start = bmat[, 2] - 1, 147 Chrom = bmat[, 1],
138 End = bmat[, 3], 148 Start = bmat[, 2] - 1,
139 bmat[, 4:ncol(bmat)]) 149 End = bmat[, 3],
140 write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) 150 bmat[, 4:ncol(bmat)]
151 )
152 write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE)
141 } 153 }
142 154
143 # Output RData file 155 # Output RData file
144 if (!is.null(opt$rdaOpt)) { 156 if (!is.null(opt$rdaOpt)) {
145 save.image(file = "DiffBind_analysis.RData") 157 save.image(file = "DiffBind_analysis.RData")
146 } 158 }
147 159
148 # Output analysis info 160 # Output analysis info
149 if (!is.null(opt$infoOpt)) { 161 if (!is.null(opt$infoOpt)) {
150 info <- "DiffBind_analysis_info.txt" 162 info <- "DiffBind_analysis_info.txt"
151 cat("dba.count Info\n\n", file = info, append = TRUE) 163 cat("dba.count Info\n\n", file = info, append = TRUE)
152 capture.output(sample, file = info, append = TRUE) 164 capture.output(sample, file = info, append = TRUE)
153 cat("\ndba.analyze Info\n\n", file = info, append = TRUE) 165 cat("\ndba.analyze Info\n\n", file = info, append = TRUE)
154 capture.output(sample_analyze, file = info, append = TRUE) 166 capture.output(sample_analyze, file = info, append = TRUE)
155 cat("\nSessionInfo\n\n", file = info, append = TRUE) 167 cat("\nSessionInfo\n\n", file = info, append = TRUE)
156 capture.output(sessionInfo(), file = info, append = TRUE) 168 capture.output(sessionInfo(), file = info, append = TRUE)
157 } 169 }