Mercurial > repos > bgruening > diffbind
comparison diffbind.R @ 18:f907216064f6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit fd148a124034b44d0d61db3eec32ff991d8c152c
| author | iuc |
|---|---|
| date | Mon, 08 Jul 2024 18:31:51 +0000 |
| parents | 2605cbdaa7d8 |
| children |
comparison
equal
deleted
inserted
replaced
| 17:2605cbdaa7d8 | 18:f907216064f6 |
|---|---|
| 1 ## Setup R error handling to go to stderr | 1 ## Setup R error handling to go to stderr |
| 2 options(show.error.messages = FALSE, error = function() { | 2 options(show.error.messages = F, error = function() { |
| 3 cat(geterrmessage(), file = stderr()) | 3 cat(geterrmessage(), file = stderr()) |
| 4 q("no", 1, FALSE) | 4 q("no", 1, F) |
| 5 }) | 5 }) |
| 6 # we need that to not crash galaxy with an UTF8 error on German LC settings. | 6 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
| 7 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 7 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
| 8 | 8 |
| 9 suppressPackageStartupMessages({ | 9 suppressPackageStartupMessages({ |
| 10 library("getopt") | 10 library("getopt") |
| 11 library("DiffBind") | 11 library("DiffBind") |
| 12 library("rjson") | 12 library("rjson") |
| 13 }) | 13 }) |
| 14 | 14 |
| 15 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | 15 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) |
| 16 args <- commandArgs(trailingOnly = TRUE) | 16 args <- commandArgs(trailingOnly = TRUE) |
| 17 | 17 |
| 18 #get options, using the spec as defined by the enclosed list. | 18 # get options, using the spec as defined by the enclosed list. |
| 19 #we read the options from the default: commandArgs(TRUE). | 19 # we read the options from the default: commandArgs(TRUE). |
| 20 spec <- matrix(c( | 20 spec <- matrix(c( |
| 21 "infile", "i", 1, "character", | 21 "infile", "i", 1, "character", |
| 22 "outfile", "o", 1, "character", | 22 "outfile", "o", 1, "character", |
| 23 "scorecol", "n", 1, "integer", | 23 "method", "m", 1, "character", |
| 24 "lowerbetter", "l", 1, "logical", | 24 "scorecol", "n", 1, "integer", |
| 25 "summits", "s", 1, "integer", | 25 "lowerbetter", "l", 1, "logical", |
| 26 "th", "t", 1, "double", | 26 "summits", "s", 1, "integer", |
| 27 "format", "f", 1, "character", | 27 "th", "t", 1, "double", |
| 28 "plots", "p", 2, "character", | 28 "minoverlap", "O", 1, "integer", |
| 29 "bmatrix", "b", 0, "logical", | 29 "use_blacklist", "B", 0, "logical", |
| 30 "rdaOpt", "r", 0, "logical", | 30 "format", "f", 1, "character", |
| 31 "infoOpt", "a", 0, "logical", | 31 "plots", "p", 2, "character", |
| 32 "verbose", "v", 2, "integer", | 32 "bmatrix", "b", 0, "logical", |
| 33 "help", "h", 0, "logical" | 33 "rdaOpt", "r", 0, "logical", |
| 34 "infoOpt", "a", 0, "logical", | |
| 35 "verbose", "v", 2, "integer", | |
| 36 "help", "h", 0, "logical" | |
| 34 ), byrow = TRUE, ncol = 4) | 37 ), byrow = TRUE, ncol = 4) |
| 35 | 38 |
| 36 opt <- getopt(spec) | 39 opt <- getopt(spec) |
| 37 | |
| 38 # if help was asked for print a friendly message | 40 # if help was asked for print a friendly message |
| 39 # and exit with a non-zero error code | 41 # and exit with a non-zero error code |
| 40 if (!is.null(opt$help)) { | 42 if (!is.null(opt$help)) { |
| 41 cat(getopt(spec, usage = TRUE)) | 43 cat(getopt(spec, usage = TRUE)) |
| 42 q(status = 1) | 44 q(status = 1) |
| 43 } | 45 } |
| 44 | 46 |
| 45 parser <- newJSONParser() | 47 parser <- newJSONParser() |
| 46 parser$addData(opt$infile) | 48 parser$addData(opt$infile) |
| 47 factor_list <- parser$getObject() | 49 factor_list <- parser$getObject() |
| 53 # get the group and sample id from the peaks filenames | 55 # get the group and sample id from the peaks filenames |
| 54 groups <- sapply(strsplit(peaks, "-"), `[`, 1) | 56 groups <- sapply(strsplit(peaks, "-"), `[`, 1) |
| 55 samples <- sapply(strsplit(peaks, "-"), `[`, 2) | 57 samples <- sapply(strsplit(peaks, "-"), `[`, 2) |
| 56 | 58 |
| 57 if (length(ctrls) != 0) { | 59 if (length(ctrls) != 0) { |
| 58 sample_table <- data.frame( | 60 sample_table <- data.frame( |
| 59 SampleID = samples, | 61 SampleID = samples, |
| 60 Condition = groups, | 62 Condition = groups, |
| 61 bamReads = bams, | 63 bamReads = bams, |
| 62 bamControl = ctrls, | 64 bamControl = ctrls, |
| 63 Peaks = peaks, | 65 Peaks = peaks, |
| 64 Tissue = samples | 66 Tissue = samples |
| 65 ) # using "Tissue" column to display ids as labels in PCA plot | 67 ) # using "Tissue" column to display ids as labels in PCA plot |
| 66 } else { | 68 } else { |
| 67 sample_table <- data.frame( | 69 sample_table <- data.frame( |
| 68 SampleID = samples, | 70 SampleID = samples, |
| 69 Replicate = samples, | 71 Replicate = samples, |
| 70 Condition = groups, | 72 Condition = groups, |
| 71 bamReads = bams, | 73 bamReads = bams, |
| 72 Peaks = peaks, | 74 Peaks = peaks, |
| 73 Tissue = samples | 75 Tissue = samples |
| 74 ) | 76 ) |
| 75 } | 77 } |
| 76 | 78 |
| 77 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter) | 79 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter, minOverlap = opt$minoverlap) |
| 80 | |
| 81 if (!is.null(opt$use_blacklist)) { | |
| 82 sample <- dba.blacklist(sample, blacklist = TRUE) | |
| 83 } | |
| 78 | 84 |
| 79 if (!is.null(opt$summits)) { | 85 if (!is.null(opt$summits)) { |
| 80 sample_count <- dba.count(sample, summits = opt$summits) | 86 sample_count <- dba.count(sample, summits = opt$summits) |
| 81 } else { | 87 } else { |
| 82 sample_count <- dba.count(sample) | 88 sample_count <- dba.count(sample) |
| 83 } | 89 } |
| 84 | 90 |
| 85 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) | 91 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) |
| 86 sample_analyze <- dba.analyze(sample_contrast) | 92 |
| 87 diff_bind <- dba.report(sample_analyze, th = opt$th) | 93 if (opt$method == "DBA_DESEQ2") { |
| 94 method <- DBA_DESEQ2 | |
| 95 } else if (opt$method == "DBA_EDGER") { | |
| 96 method <- DBA_EDGER | |
| 97 } | |
| 98 | |
| 99 sample_analyze <- dba.analyze(sample_contrast, method = method, bBlacklist = FALSE, bGreylist = FALSE) | |
| 100 | |
| 101 diff_bind <- dba.report(sample_analyze, th = opt$th, method = method) | |
| 88 | 102 |
| 89 # Generate plots | 103 # Generate plots |
| 90 if (!is.null(opt$plots)) { | 104 if (!is.null(opt$plots)) { |
| 91 pdf(opt$plots) | 105 pdf(opt$plots) |
| 92 orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) | 106 orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th, method = method) |
| 93 dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) | 107 dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3, method = method) |
| 94 dba.plotMA(sample_analyze, th = opt$th) | 108 dba.plotMA(sample_analyze, th = opt$th, method = method) |
| 95 dba.plotVolcano(sample_analyze, th = opt$th) | 109 dba.plotVolcano(sample_analyze, th = opt$th, method = method) |
| 96 dba.plotBox(sample_analyze, th = opt$th) | 110 dba.plotBox(sample_analyze, th = opt$th, method = method) |
| 97 dev.off() | 111 dev.off() |
| 98 } | 112 } |
| 99 | 113 |
| 100 # Output differential binding sites | 114 # Output differential binding sites |
| 101 res_sorted <- diff_bind[order(diff_bind$FDR), ] | 115 res_sorted <- diff_bind[order(diff_bind$FDR), ] |
| 102 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) | 116 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) |
| 103 if (opt$format == "bed") { | 117 if (opt$format == "bed") { |
| 104 res_sorted <- data.frame( | 118 res_sorted <- data.frame( |
| 105 Chrom = seqnames(res_sorted), | 119 Chrom = seqnames(res_sorted), |
| 106 Start = start(res_sorted) - 1, | 120 Start = start(res_sorted) - 1, |
| 107 End = end(res_sorted), | 121 End = end(res_sorted), |
| 108 Name = rep("DiffBind", length(res_sorted)), | 122 Name = rep("DiffBind", length(res_sorted)), |
| 109 Score = rep("0", length(res_sorted)), | 123 Score = rep("0", length(res_sorted)), |
| 110 Strand = gsub("\\*", ".", strand(res_sorted)) | 124 Strand = gsub("\\*", ".", strand(res_sorted)) |
| 111 ) | 125 ) |
| 112 } else if (opt$format == "interval") { | 126 } else if (opt$format == "interval") { |
| 113 # Output as interval | 127 # Output as interval |
| 114 df <- as.data.frame(res_sorted) | 128 df <- as.data.frame(res_sorted) |
| 115 extrainfo <- NULL | 129 extrainfo <- NULL |
| 116 for (i in seq_len(nrow(df))) { | 130 for (i in seq_len(nrow(df))) { |
| 117 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") | 131 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") |
| 118 } | 132 } |
| 119 res_sorted <- data.frame( | 133 res_sorted <- data.frame( |
| 120 Chrom = seqnames(res_sorted), | 134 Chrom = seqnames(res_sorted), |
| 121 Start = start(res_sorted) - 1, | 135 Start = start(res_sorted) - 1, |
| 122 End = end(res_sorted), | 136 End = end(res_sorted), |
| 123 Name = rep("DiffBind", length(res_sorted)), | 137 Name = rep("DiffBind", length(res_sorted)), |
| 124 Score = rep("0", length(res_sorted)), | 138 Score = rep("0", length(res_sorted)), |
| 125 Strand = gsub("\\*", ".", strand(res_sorted)), | 139 Strand = gsub("\\*", ".", strand(res_sorted)), |
| 126 Comment = extrainfo | 140 Comment = extrainfo |
| 127 ) | 141 ) |
| 128 } else { | 142 } else { |
| 129 # Output as 0-based tabular | 143 # Output as 0-based tabular |
| 130 res_sorted <- data.frame( | 144 res_sorted <- data.frame( |
| 131 Chrom = seqnames(res_sorted), | 145 Chrom = seqnames(res_sorted), |
| 132 Start = start(res_sorted) - 1, | 146 Start = start(res_sorted) - 1, |
| 133 End = end(res_sorted), | 147 End = end(res_sorted), |
| 134 Name = rep("DiffBind", length(res_sorted)), | 148 Name = rep("DiffBind", length(res_sorted)), |
| 135 Score = rep("0", length(res_sorted)), | 149 Score = rep("0", length(res_sorted)), |
| 136 Strand = gsub("\\*", ".", strand(res_sorted)), | 150 Strand = gsub("\\*", ".", strand(res_sorted)), |
| 137 mcols(res_sorted) | 151 mcols(res_sorted) |
| 138 ) | 152 ) |
| 139 } | 153 } |
| 140 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) | 154 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) |
| 141 | 155 |
| 142 # Output binding affinity scores | 156 # Output binding affinity scores |
| 143 if (!is.null(opt$bmatrix)) { | 157 if (!is.null(opt$bmatrix)) { |
| 144 bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) | 158 bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME, minOverlap = opt$minoverlap) |
| 145 # Output as 0-based tabular | 159 # Output as 0-based tabular |
| 146 bmat <- data.frame( | 160 bmat <- data.frame( |
| 147 Chrom = bmat[, 1], | 161 Chrom = bmat[, 1], |
| 148 Start = bmat[, 2] - 1, | 162 Start = bmat[, 2] - 1, |
| 149 End = bmat[, 3], | 163 End = bmat[, 3], |
| 150 bmat[, 4:ncol(bmat)] | 164 bmat[, 4:ncol(bmat)] |
| 151 ) | 165 ) |
| 152 write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) | 166 write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) |
| 153 } | 167 } |
| 154 | 168 |
| 155 # Output RData file | 169 # Output RData file |
| 156 if (!is.null(opt$rdaOpt)) { | 170 if (!is.null(opt$rdaOpt)) { |
| 157 save.image(file = "DiffBind_analysis.RData") | 171 save.image(file = "DiffBind_analysis.RData") |
| 158 } | 172 } |
| 159 | 173 |
| 160 # Output analysis info | 174 # Output analysis info |
| 161 if (!is.null(opt$infoOpt)) { | 175 if (!is.null(opt$infoOpt)) { |
| 162 info <- "DiffBind_analysis_info.txt" | 176 info <- "DiffBind_analysis_info.txt" |
| 163 cat("dba.count Info\n\n", file = info, append = TRUE) | 177 cat("dba.count Info\n\n", file = info, append = TRUE) |
| 164 capture.output(sample, file = info, append = TRUE) | 178 capture.output(sample, file = info, append = TRUE) |
| 165 cat("\ndba.analyze Info\n\n", file = info, append = TRUE) | 179 cat("\ndba.analyze Info\n\n", file = info, append = TRUE) |
| 166 capture.output(sample_analyze, file = info, append = TRUE) | 180 capture.output(sample_analyze, file = info, append = TRUE) |
| 167 cat("\nSessionInfo\n\n", file = info, append = TRUE) | 181 cat("\nSessionInfo\n\n", file = info, append = TRUE) |
| 168 capture.output(sessionInfo(), file = info, append = TRUE) | 182 capture.output(sessionInfo(), file = info, append = TRUE) |
| 169 } | 183 } |
