Mercurial > repos > bgruening > diffbind
comparison diffbind.R @ 17:2605cbdaa7d8 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 3da34ac6e5b18fd5deacaf31b757aca6bae82251
author | iuc |
---|---|
date | Fri, 15 Dec 2023 19:39:14 +0000 |
parents | 163688bb8f73 |
children | f907216064f6 |
comparison
equal
deleted
inserted
replaced
16:163688bb8f73 | 17:2605cbdaa7d8 |
---|---|
1 ## Setup R error handling to go to stderr | 1 ## Setup R error handling to go to stderr |
2 options(show.error.messages = F, error = function() { | 2 options(show.error.messages = FALSE, error = function() { |
3 cat(geterrmessage(), file = stderr()); q("no", 1, F) | 3 cat(geterrmessage(), file = stderr()) |
4 q("no", 1, FALSE) | |
4 }) | 5 }) |
5 # we need that to not crash galaxy with an UTF8 error on German LC settings. | 6 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
6 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 7 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
7 | 8 |
8 suppressPackageStartupMessages({ | 9 suppressPackageStartupMessages({ |
9 library("getopt") | 10 library("getopt") |
10 library("DiffBind") | 11 library("DiffBind") |
11 library("rjson") | 12 library("rjson") |
12 }) | 13 }) |
13 | 14 |
14 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | 15 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) |
15 args <- commandArgs(trailingOnly = TRUE) | 16 args <- commandArgs(trailingOnly = TRUE) |
16 | 17 |
17 #get options, using the spec as defined by the enclosed list. | 18 #get options, using the spec as defined by the enclosed list. |
18 #we read the options from the default: commandArgs(TRUE). | 19 #we read the options from the default: commandArgs(TRUE). |
19 spec <- matrix(c( | 20 spec <- matrix(c( |
20 "infile", "i", 1, "character", | 21 "infile", "i", 1, "character", |
21 "outfile", "o", 1, "character", | 22 "outfile", "o", 1, "character", |
22 "scorecol", "n", 1, "integer", | 23 "scorecol", "n", 1, "integer", |
23 "lowerbetter", "l", 1, "logical", | 24 "lowerbetter", "l", 1, "logical", |
24 "summits", "s", 1, "integer", | 25 "summits", "s", 1, "integer", |
25 "th", "t", 1, "double", | 26 "th", "t", 1, "double", |
26 "format", "f", 1, "character", | 27 "format", "f", 1, "character", |
27 "plots", "p", 2, "character", | 28 "plots", "p", 2, "character", |
28 "bmatrix", "b", 0, "logical", | 29 "bmatrix", "b", 0, "logical", |
29 "rdaOpt", "r", 0, "logical", | 30 "rdaOpt", "r", 0, "logical", |
30 "infoOpt", "a", 0, "logical", | 31 "infoOpt", "a", 0, "logical", |
31 "verbose", "v", 2, "integer", | 32 "verbose", "v", 2, "integer", |
32 "help", "h", 0, "logical" | 33 "help", "h", 0, "logical" |
33 ), byrow = TRUE, ncol = 4); | 34 ), byrow = TRUE, ncol = 4) |
34 | 35 |
35 opt <- getopt(spec); | 36 opt <- getopt(spec) |
36 | 37 |
37 # if help was asked for print a friendly message | 38 # if help was asked for print a friendly message |
38 # and exit with a non-zero error code | 39 # and exit with a non-zero error code |
39 if (!is.null(opt$help)) { | 40 if (!is.null(opt$help)) { |
40 cat(getopt(spec, usage = TRUE)); | 41 cat(getopt(spec, usage = TRUE)) |
41 q(status = 1); | 42 q(status = 1) |
42 } | 43 } |
43 | 44 |
44 parser <- newJSONParser() | 45 parser <- newJSONParser() |
45 parser$addData(opt$infile) | 46 parser$addData(opt$infile) |
46 factor_list <- parser$getObject() | 47 factor_list <- parser$getObject() |
52 # get the group and sample id from the peaks filenames | 53 # get the group and sample id from the peaks filenames |
53 groups <- sapply(strsplit(peaks, "-"), `[`, 1) | 54 groups <- sapply(strsplit(peaks, "-"), `[`, 1) |
54 samples <- sapply(strsplit(peaks, "-"), `[`, 2) | 55 samples <- sapply(strsplit(peaks, "-"), `[`, 2) |
55 | 56 |
56 if (length(ctrls) != 0) { | 57 if (length(ctrls) != 0) { |
57 sample_table <- data.frame(SampleID = samples, | 58 sample_table <- data.frame( |
58 Condition = groups, | 59 SampleID = samples, |
59 bamReads = bams, | 60 Condition = groups, |
60 bamControl = ctrls, | 61 bamReads = bams, |
61 Peaks = peaks, | 62 bamControl = ctrls, |
62 Tissue = samples) # using "Tissue" column to display ids as labels in PCA plot | 63 Peaks = peaks, |
64 Tissue = samples | |
65 ) # using "Tissue" column to display ids as labels in PCA plot | |
63 } else { | 66 } else { |
64 | 67 sample_table <- data.frame( |
65 sample_table <- data.frame(SampleID = samples, | 68 SampleID = samples, |
66 Replicate = samples, | 69 Replicate = samples, |
67 Condition = groups, | 70 Condition = groups, |
68 bamReads = bams, | 71 bamReads = bams, |
69 Peaks = peaks, | 72 Peaks = peaks, |
70 Tissue = samples) | 73 Tissue = samples |
74 ) | |
71 } | 75 } |
72 | 76 |
73 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter) | 77 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter) |
74 | 78 |
75 if (!is.null(opt$summits)) { | 79 if (!is.null(opt$summits)) { |
76 sample_count <- dba.count(sample, summits = opt$summits) | 80 sample_count <- dba.count(sample, summits = opt$summits) |
77 } else { | 81 } else { |
78 sample_count <- dba.count(sample) | 82 sample_count <- dba.count(sample) |
79 } | 83 } |
80 | 84 |
81 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) | 85 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) |
82 sample_analyze <- dba.analyze(sample_contrast) | 86 sample_analyze <- dba.analyze(sample_contrast) |
83 diff_bind <- dba.report(sample_analyze, th = opt$th) | 87 diff_bind <- dba.report(sample_analyze, th = opt$th) |
84 | 88 |
85 # Generate plots | 89 # Generate plots |
86 if (!is.null(opt$plots)) { | 90 if (!is.null(opt$plots)) { |
87 pdf(opt$plots) | 91 pdf(opt$plots) |
88 orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) | 92 orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) |
89 dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) | 93 dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) |
90 dba.plotMA(sample_analyze, th = opt$th) | 94 dba.plotMA(sample_analyze, th = opt$th) |
91 dba.plotVolcano(sample_analyze, th = opt$th) | 95 dba.plotVolcano(sample_analyze, th = opt$th) |
92 dba.plotBox(sample_analyze, th = opt$th) | 96 dba.plotBox(sample_analyze, th = opt$th) |
93 dev.off() | 97 dev.off() |
94 } | 98 } |
95 | 99 |
96 # Output differential binding sites | 100 # Output differential binding sites |
97 res_sorted <- diff_bind[order(diff_bind$FDR), ] | 101 res_sorted <- diff_bind[order(diff_bind$FDR), ] |
98 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) | 102 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) |
99 if (opt$format == "bed") { | 103 if (opt$format == "bed") { |
100 res_sorted <- data.frame(Chrom = seqnames(res_sorted), | 104 res_sorted <- data.frame( |
101 Start = start(res_sorted) - 1, | 105 Chrom = seqnames(res_sorted), |
102 End = end(res_sorted), | 106 Start = start(res_sorted) - 1, |
103 Name = rep("DiffBind", length(res_sorted)), | 107 End = end(res_sorted), |
104 Score = rep("0", length(res_sorted)), | 108 Name = rep("DiffBind", length(res_sorted)), |
105 Strand = gsub("\\*", ".", strand(res_sorted))) | 109 Score = rep("0", length(res_sorted)), |
110 Strand = gsub("\\*", ".", strand(res_sorted)) | |
111 ) | |
106 } else if (opt$format == "interval") { | 112 } else if (opt$format == "interval") { |
107 # Output as interval | 113 # Output as interval |
108 df <- as.data.frame(res_sorted) | 114 df <- as.data.frame(res_sorted) |
109 extrainfo <- NULL | 115 extrainfo <- NULL |
110 for (i in seq_len(nrow(df))) { | 116 for (i in seq_len(nrow(df))) { |
111 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") | 117 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") |
112 } | 118 } |
113 res_sorted <- data.frame(Chrom = seqnames(res_sorted), | 119 res_sorted <- data.frame( |
114 Start = start(res_sorted) - 1, | 120 Chrom = seqnames(res_sorted), |
115 End = end(res_sorted), | 121 Start = start(res_sorted) - 1, |
116 Name = rep("DiffBind", length(res_sorted)), | 122 End = end(res_sorted), |
117 Score = rep("0", length(res_sorted)), | 123 Name = rep("DiffBind", length(res_sorted)), |
118 Strand = gsub("\\*", ".", strand(res_sorted)), | 124 Score = rep("0", length(res_sorted)), |
119 Comment = extrainfo) | 125 Strand = gsub("\\*", ".", strand(res_sorted)), |
126 Comment = extrainfo | |
127 ) | |
120 } else { | 128 } else { |
121 # Output as 0-based tabular | 129 # Output as 0-based tabular |
122 res_sorted <- data.frame(Chrom = seqnames(res_sorted), | 130 res_sorted <- data.frame( |
123 Start = start(res_sorted) - 1, | 131 Chrom = seqnames(res_sorted), |
124 End = end(res_sorted), | 132 Start = start(res_sorted) - 1, |
125 Name = rep("DiffBind", length(res_sorted)), | 133 End = end(res_sorted), |
126 Score = rep("0", length(res_sorted)), | 134 Name = rep("DiffBind", length(res_sorted)), |
127 Strand = gsub("\\*", ".", strand(res_sorted)), | 135 Score = rep("0", length(res_sorted)), |
128 mcols(res_sorted)) | 136 Strand = gsub("\\*", ".", strand(res_sorted)), |
137 mcols(res_sorted) | |
138 ) | |
129 } | 139 } |
130 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) | 140 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) |
131 | 141 |
132 # Output binding affinity scores | 142 # Output binding affinity scores |
133 if (!is.null(opt$bmatrix)) { | 143 if (!is.null(opt$bmatrix)) { |
134 bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) | 144 bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) |
135 # Output as 0-based tabular | 145 # Output as 0-based tabular |
136 bmat <- data.frame(Chrom = bmat[, 1], | 146 bmat <- data.frame( |
137 Start = bmat[, 2] - 1, | 147 Chrom = bmat[, 1], |
138 End = bmat[, 3], | 148 Start = bmat[, 2] - 1, |
139 bmat[, 4:ncol(bmat)]) | 149 End = bmat[, 3], |
140 write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) | 150 bmat[, 4:ncol(bmat)] |
151 ) | |
152 write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) | |
141 } | 153 } |
142 | 154 |
143 # Output RData file | 155 # Output RData file |
144 if (!is.null(opt$rdaOpt)) { | 156 if (!is.null(opt$rdaOpt)) { |
145 save.image(file = "DiffBind_analysis.RData") | 157 save.image(file = "DiffBind_analysis.RData") |
146 } | 158 } |
147 | 159 |
148 # Output analysis info | 160 # Output analysis info |
149 if (!is.null(opt$infoOpt)) { | 161 if (!is.null(opt$infoOpt)) { |
150 info <- "DiffBind_analysis_info.txt" | 162 info <- "DiffBind_analysis_info.txt" |
151 cat("dba.count Info\n\n", file = info, append = TRUE) | 163 cat("dba.count Info\n\n", file = info, append = TRUE) |
152 capture.output(sample, file = info, append = TRUE) | 164 capture.output(sample, file = info, append = TRUE) |
153 cat("\ndba.analyze Info\n\n", file = info, append = TRUE) | 165 cat("\ndba.analyze Info\n\n", file = info, append = TRUE) |
154 capture.output(sample_analyze, file = info, append = TRUE) | 166 capture.output(sample_analyze, file = info, append = TRUE) |
155 cat("\nSessionInfo\n\n", file = info, append = TRUE) | 167 cat("\nSessionInfo\n\n", file = info, append = TRUE) |
156 capture.output(sessionInfo(), file = info, append = TRUE) | 168 capture.output(sessionInfo(), file = info, append = TRUE) |
157 } | 169 } |