Mercurial > repos > bgruening > diffbind
comparison diffbind.R @ 16:163688bb8f73 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 77828512472bf9815bdda725a58a2321f7803661"
| author | iuc |
|---|---|
| date | Wed, 18 Nov 2020 12:54:07 +0000 |
| parents | 194e3f2c1d86 |
| children | 2605cbdaa7d8 |
comparison
equal
deleted
inserted
replaced
| 15:194e3f2c1d86 | 16:163688bb8f73 |
|---|---|
| 1 ## Setup R error handling to go to stderr | 1 ## Setup R error handling to go to stderr |
| 2 options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) | 2 options(show.error.messages = F, error = function() { |
| 3 cat(geterrmessage(), file = stderr()); q("no", 1, F) | |
| 4 }) | |
| 3 # we need that to not crash galaxy with an UTF8 error on German LC settings. | 5 # we need that to not crash galaxy with an UTF8 error on German LC settings. |
| 4 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | 6 Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
| 5 | 7 |
| 6 suppressPackageStartupMessages({ | 8 suppressPackageStartupMessages({ |
| 7 library('getopt') | 9 library("getopt") |
| 8 library('DiffBind') | 10 library("DiffBind") |
| 9 library('rjson') | 11 library("rjson") |
| 10 }) | 12 }) |
| 11 | 13 |
| 12 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) | 14 options(stringAsfactors = FALSE, useFancyQuotes = FALSE) |
| 13 args <- commandArgs(trailingOnly = TRUE) | 15 args <- commandArgs(trailingOnly = TRUE) |
| 14 | 16 |
| 15 #get options, using the spec as defined by the enclosed list. | 17 #get options, using the spec as defined by the enclosed list. |
| 16 #we read the options from the default: commandArgs(TRUE). | 18 #we read the options from the default: commandArgs(TRUE). |
| 17 spec = matrix(c( | 19 spec <- matrix(c( |
| 18 'infile' , 'i', 1, "character", | 20 "infile", "i", 1, "character", |
| 19 'outfile' , 'o', 1, "character", | 21 "outfile", "o", 1, "character", |
| 20 'scorecol', 'n', 1, "integer", | 22 "scorecol", "n", 1, "integer", |
| 21 'lowerbetter', 'l', 1, "logical", | 23 "lowerbetter", "l", 1, "logical", |
| 22 'summits', 's', 1, "integer", | 24 "summits", "s", 1, "integer", |
| 23 'th', 't', 1, "double", | 25 "th", "t", 1, "double", |
| 24 'format', 'f', 1, "character", | 26 "format", "f", 1, "character", |
| 25 'plots' , 'p', 2, "character", | 27 "plots", "p", 2, "character", |
| 26 'bmatrix', 'b', 0, "logical", | 28 "bmatrix", "b", 0, "logical", |
| 27 "rdaOpt", "r", 0, "logical", | 29 "rdaOpt", "r", 0, "logical", |
| 28 'infoOpt' , 'a', 0, "logical", | 30 "infoOpt", "a", 0, "logical", |
| 29 'verbose', 'v', 2, "integer", | 31 "verbose", "v", 2, "integer", |
| 30 'help' , 'h', 0, "logical" | 32 "help", "h", 0, "logical" |
| 31 ), byrow=TRUE, ncol=4); | 33 ), byrow = TRUE, ncol = 4); |
| 32 | 34 |
| 33 opt = getopt(spec); | 35 opt <- getopt(spec); |
| 34 | 36 |
| 35 # if help was asked for print a friendly message | 37 # if help was asked for print a friendly message |
| 36 # and exit with a non-zero error code | 38 # and exit with a non-zero error code |
| 37 if ( !is.null(opt$help) ) { | 39 if (!is.null(opt$help)) { |
| 38 cat(getopt(spec, usage=TRUE)); | 40 cat(getopt(spec, usage = TRUE)); |
| 39 q(status=1); | 41 q(status = 1); |
| 40 } | 42 } |
| 41 | 43 |
| 42 parser <- newJSONParser() | 44 parser <- newJSONParser() |
| 43 parser$addData(opt$infile) | 45 parser$addData(opt$infile) |
| 44 factorList <- parser$getObject() | 46 factor_list <- parser$getObject() |
| 45 filenamesIn <- unname(unlist(factorList[[1]][[2]])) | 47 filenames_in <- unname(unlist(factor_list[[1]][[2]])) |
| 46 peaks <- filenamesIn[grepl("peaks.bed", filenamesIn)] | 48 peaks <- filenames_in[grepl("peaks.bed", filenames_in)] |
| 47 bams <- filenamesIn[grepl("bamreads.bam", filenamesIn)] | 49 bams <- filenames_in[grepl("bamreads.bam", filenames_in)] |
| 48 ctrls <- filenamesIn[grepl("bamcontrol.bam", filenamesIn)] | 50 ctrls <- filenames_in[grepl("bamcontrol.bam", filenames_in)] |
| 49 | 51 |
| 50 # get the group and sample id from the peaks filenames | 52 # get the group and sample id from the peaks filenames |
| 51 groups <- sapply(strsplit(peaks,"-"), `[`, 1) | 53 groups <- sapply(strsplit(peaks, "-"), `[`, 1) |
| 52 samples <- sapply(strsplit(peaks,"-"), `[`, 2) | 54 samples <- sapply(strsplit(peaks, "-"), `[`, 2) |
| 53 | 55 |
| 54 if ( length(ctrls) != 0 ) { | 56 if (length(ctrls) != 0) { |
| 55 sampleTable <- data.frame(SampleID=samples, | 57 sample_table <- data.frame(SampleID = samples, |
| 56 Condition=groups, | 58 Condition = groups, |
| 57 bamReads=bams, | 59 bamReads = bams, |
| 58 bamControl=ctrls, | 60 bamControl = ctrls, |
| 59 Peaks=peaks, | 61 Peaks = peaks, |
| 60 Tissue=samples) # using "Tissue" column to display ids as labels in PCA plot | 62 Tissue = samples) # using "Tissue" column to display ids as labels in PCA plot |
| 61 } else { | 63 } else { |
| 62 | 64 |
| 63 sampleTable <- data.frame(SampleID=samples, | 65 sample_table <- data.frame(SampleID = samples, |
| 64 Replicate=samples, | 66 Replicate = samples, |
| 65 Condition=groups, | 67 Condition = groups, |
| 66 bamReads=bams, | 68 bamReads = bams, |
| 67 Peaks=peaks, | 69 Peaks = peaks, |
| 68 Tissue=samples) | 70 Tissue = samples) |
| 69 } | 71 } |
| 70 | 72 |
| 71 sample = dba(sampleSheet=sampleTable, peakFormat='bed', scoreCol=opt$scorecol, bLowerScoreBetter=opt$lowerbetter) | 73 sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter) |
| 72 | 74 |
| 73 if ( !is.null(opt$summits) ) { | 75 if (!is.null(opt$summits)) { |
| 74 sample_count = dba.count(sample, summits=opt$summits) | 76 sample_count <- dba.count(sample, summits = opt$summits) |
| 75 } else { | 77 } else { |
| 76 sample_count = dba.count(sample) | 78 sample_count <- dba.count(sample) |
| 77 } | 79 } |
| 78 | 80 |
| 79 sample_contrast = dba.contrast(sample_count, categories=DBA_CONDITION, minMembers=2) | 81 sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) |
| 80 sample_analyze = dba.analyze(sample_contrast) | 82 sample_analyze <- dba.analyze(sample_contrast) |
| 81 diff_bind = dba.report(sample_analyze, th=opt$th) | 83 diff_bind <- dba.report(sample_analyze, th = opt$th) |
| 82 | 84 |
| 83 # Generate plots | 85 # Generate plots |
| 84 if ( !is.null(opt$plots) ) { | 86 if (!is.null(opt$plots)) { |
| 85 pdf(opt$plots) | 87 pdf(opt$plots) |
| 86 orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE, cexCol=0.8, th=opt$th) | 88 orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) |
| 87 dba.plotPCA(sample_analyze, contrast=1, th=opt$th, label=DBA_TISSUE, labelSize=0.3) | 89 dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) |
| 88 dba.plotMA(sample_analyze, th=opt$th) | 90 dba.plotMA(sample_analyze, th = opt$th) |
| 89 dba.plotVolcano(sample_analyze, th=opt$th) | 91 dba.plotVolcano(sample_analyze, th = opt$th) |
| 90 dba.plotBox(sample_analyze, th=opt$th) | 92 dba.plotBox(sample_analyze, th = opt$th) |
| 91 dev.off() | 93 dev.off() |
| 92 } | 94 } |
| 93 | 95 |
| 94 # Output differential binding sites | 96 # Output differential binding sites |
| 95 resSorted <- diff_bind[order(diff_bind$FDR),] | 97 res_sorted <- diff_bind[order(diff_bind$FDR), ] |
| 96 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) | 98 # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) |
| 97 if (opt$format == "bed") { | 99 if (opt$format == "bed") { |
| 98 resSorted <- data.frame(Chrom=seqnames(resSorted), | 100 res_sorted <- data.frame(Chrom = seqnames(res_sorted), |
| 99 Start=start(resSorted) - 1, | 101 Start = start(res_sorted) - 1, |
| 100 End=end(resSorted), | 102 End = end(res_sorted), |
| 101 Name=rep("DiffBind", length(resSorted)), | 103 Name = rep("DiffBind", length(res_sorted)), |
| 102 Score=rep("0", length(resSorted)), | 104 Score = rep("0", length(res_sorted)), |
| 103 Strand=gsub("\\*", ".", strand(resSorted))) | 105 Strand = gsub("\\*", ".", strand(res_sorted))) |
| 104 } else if (opt$format == "interval") { | 106 } else if (opt$format == "interval") { |
| 105 # Output as interval | 107 # Output as interval |
| 106 df <- as.data.frame(resSorted) | 108 df <- as.data.frame(res_sorted) |
| 107 extrainfo <- NULL | 109 extrainfo <- NULL |
| 108 for (i in 1:nrow(df)) { | 110 for (i in seq_len(nrow(df))) { |
| 109 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse="|") | 111 extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") |
| 110 } | 112 } |
| 111 resSorted <- data.frame(Chrom=seqnames(resSorted), | 113 res_sorted <- data.frame(Chrom = seqnames(res_sorted), |
| 112 Start=start(resSorted) - 1, | 114 Start = start(res_sorted) - 1, |
| 113 End=end(resSorted), | 115 End = end(res_sorted), |
| 114 Name=rep("DiffBind", length(resSorted)), | 116 Name = rep("DiffBind", length(res_sorted)), |
| 115 Score=rep("0", length(resSorted)), | 117 Score = rep("0", length(res_sorted)), |
| 116 Strand=gsub("\\*", ".", strand(resSorted)), | 118 Strand = gsub("\\*", ".", strand(res_sorted)), |
| 117 Comment=extrainfo) | 119 Comment = extrainfo) |
| 118 } else { | 120 } else { |
| 119 # Output as 0-based tabular | 121 # Output as 0-based tabular |
| 120 resSorted <- data.frame(Chrom=seqnames(resSorted), | 122 res_sorted <- data.frame(Chrom = seqnames(res_sorted), |
| 121 Start=start(resSorted) - 1, | 123 Start = start(res_sorted) - 1, |
| 122 End=end(resSorted), | 124 End = end(res_sorted), |
| 123 Name=rep("DiffBind", length(resSorted)), | 125 Name = rep("DiffBind", length(res_sorted)), |
| 124 Score=rep("0", length(resSorted)), | 126 Score = rep("0", length(res_sorted)), |
| 125 Strand=gsub("\\*", ".", strand(resSorted)), | 127 Strand = gsub("\\*", ".", strand(res_sorted)), |
| 126 mcols(resSorted)) | 128 mcols(res_sorted)) |
| 127 } | 129 } |
| 128 write.table(resSorted, file = opt$outfile, sep="\t", quote = FALSE, row.names = FALSE) | 130 write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) |
| 129 | 131 |
| 130 # Output binding affinity scores | 132 # Output binding affinity scores |
| 131 if (!is.null(opt$bmatrix)) { | 133 if (!is.null(opt$bmatrix)) { |
| 132 bmat <- dba.peakset(sample_count, bRetrieve=TRUE, DataType=DBA_DATA_FRAME) | 134 bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) |
| 133 # Output as 0-based tabular | 135 # Output as 0-based tabular |
| 134 bmat <- data.frame(Chrom=bmat[, 1], | 136 bmat <- data.frame(Chrom = bmat[, 1], |
| 135 Start=bmat[, 2] - 1, | 137 Start = bmat[, 2] - 1, |
| 136 End=bmat[, 3], | 138 End = bmat[, 3], |
| 137 bmat[, 4:ncol(bmat)]) | 139 bmat[, 4:ncol(bmat)]) |
| 138 write.table(bmat, file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE) | 140 write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) |
| 139 } | 141 } |
| 140 | 142 |
| 141 # Output RData file | 143 # Output RData file |
| 142 if (!is.null(opt$rdaOpt)) { | 144 if (!is.null(opt$rdaOpt)) { |
| 143 save.image(file = "DiffBind_analysis.RData") | 145 save.image(file = "DiffBind_analysis.RData") |
| 144 } | 146 } |
| 145 | 147 |
| 146 # Output analysis info | 148 # Output analysis info |
| 147 if (!is.null(opt$infoOpt)) { | 149 if (!is.null(opt$infoOpt)) { |
| 148 info <- "DiffBind_analysis_info.txt" | 150 info <- "DiffBind_analysis_info.txt" |
| 149 cat("dba.count Info\n\n", file=info, append = TRUE) | 151 cat("dba.count Info\n\n", file = info, append = TRUE) |
| 150 capture.output(sample, file=info, append=TRUE) | 152 capture.output(sample, file = info, append = TRUE) |
| 151 cat("\ndba.analyze Info\n\n", file=info, append = TRUE) | 153 cat("\ndba.analyze Info\n\n", file = info, append = TRUE) |
| 152 capture.output(sample_analyze, file=info, append=TRUE) | 154 capture.output(sample_analyze, file = info, append = TRUE) |
| 153 cat("\nSessionInfo\n\n", file=info, append = TRUE) | 155 cat("\nSessionInfo\n\n", file = info, append = TRUE) |
| 154 capture.output(sessionInfo(), file=info, append=TRUE) | 156 capture.output(sessionInfo(), file = info, append = TRUE) |
| 155 } | 157 } |
