Mercurial > repos > bgruening > diffbind
diff diffbind.R @ 16:163688bb8f73 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 77828512472bf9815bdda725a58a2321f7803661"
author | iuc |
---|---|
date | Wed, 18 Nov 2020 12:54:07 +0000 |
parents | 194e3f2c1d86 |
children | 2605cbdaa7d8 |
line wrap: on
line diff
--- a/diffbind.R Tue Jul 09 18:46:09 2019 -0400 +++ b/diffbind.R Wed Nov 18 12:54:07 2020 +0000 @@ -1,12 +1,14 @@ ## Setup R error handling to go to stderr -options( show.error.messages=F, error = function () { cat( geterrmessage(), file=stderr() ); q( "no", 1, F ) } ) +options(show.error.messages = F, error = function() { + cat(geterrmessage(), file = stderr()); q("no", 1, F) +}) # we need that to not crash galaxy with an UTF8 error on German LC settings. Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") suppressPackageStartupMessages({ - library('getopt') - library('DiffBind') - library('rjson') + library("getopt") + library("DiffBind") + library("rjson") }) options(stringAsfactors = FALSE, useFancyQuotes = FALSE) @@ -14,128 +16,128 @@ #get options, using the spec as defined by the enclosed list. #we read the options from the default: commandArgs(TRUE). -spec = matrix(c( - 'infile' , 'i', 1, "character", - 'outfile' , 'o', 1, "character", - 'scorecol', 'n', 1, "integer", - 'lowerbetter', 'l', 1, "logical", - 'summits', 's', 1, "integer", - 'th', 't', 1, "double", - 'format', 'f', 1, "character", - 'plots' , 'p', 2, "character", - 'bmatrix', 'b', 0, "logical", +spec <- matrix(c( + "infile", "i", 1, "character", + "outfile", "o", 1, "character", + "scorecol", "n", 1, "integer", + "lowerbetter", "l", 1, "logical", + "summits", "s", 1, "integer", + "th", "t", 1, "double", + "format", "f", 1, "character", + "plots", "p", 2, "character", + "bmatrix", "b", 0, "logical", "rdaOpt", "r", 0, "logical", - 'infoOpt' , 'a', 0, "logical", - 'verbose', 'v', 2, "integer", - 'help' , 'h', 0, "logical" -), byrow=TRUE, ncol=4); + "infoOpt", "a", 0, "logical", + "verbose", "v", 2, "integer", + "help", "h", 0, "logical" +), byrow = TRUE, ncol = 4); -opt = getopt(spec); +opt <- getopt(spec); # if help was asked for print a friendly message # and exit with a non-zero error code -if ( !is.null(opt$help) ) { - cat(getopt(spec, usage=TRUE)); - q(status=1); +if (!is.null(opt$help)) { + cat(getopt(spec, usage = TRUE)); + q(status = 1); } parser <- newJSONParser() parser$addData(opt$infile) -factorList <- parser$getObject() -filenamesIn <- unname(unlist(factorList[[1]][[2]])) -peaks <- filenamesIn[grepl("peaks.bed", filenamesIn)] -bams <- filenamesIn[grepl("bamreads.bam", filenamesIn)] -ctrls <- filenamesIn[grepl("bamcontrol.bam", filenamesIn)] +factor_list <- parser$getObject() +filenames_in <- unname(unlist(factor_list[[1]][[2]])) +peaks <- filenames_in[grepl("peaks.bed", filenames_in)] +bams <- filenames_in[grepl("bamreads.bam", filenames_in)] +ctrls <- filenames_in[grepl("bamcontrol.bam", filenames_in)] # get the group and sample id from the peaks filenames -groups <- sapply(strsplit(peaks,"-"), `[`, 1) -samples <- sapply(strsplit(peaks,"-"), `[`, 2) +groups <- sapply(strsplit(peaks, "-"), `[`, 1) +samples <- sapply(strsplit(peaks, "-"), `[`, 2) -if ( length(ctrls) != 0 ) { - sampleTable <- data.frame(SampleID=samples, - Condition=groups, - bamReads=bams, - bamControl=ctrls, - Peaks=peaks, - Tissue=samples) # using "Tissue" column to display ids as labels in PCA plot +if (length(ctrls) != 0) { + sample_table <- data.frame(SampleID = samples, + Condition = groups, + bamReads = bams, + bamControl = ctrls, + Peaks = peaks, + Tissue = samples) # using "Tissue" column to display ids as labels in PCA plot } else { - sampleTable <- data.frame(SampleID=samples, - Replicate=samples, - Condition=groups, - bamReads=bams, - Peaks=peaks, - Tissue=samples) + sample_table <- data.frame(SampleID = samples, + Replicate = samples, + Condition = groups, + bamReads = bams, + Peaks = peaks, + Tissue = samples) } -sample = dba(sampleSheet=sampleTable, peakFormat='bed', scoreCol=opt$scorecol, bLowerScoreBetter=opt$lowerbetter) +sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter) -if ( !is.null(opt$summits) ) { - sample_count = dba.count(sample, summits=opt$summits) +if (!is.null(opt$summits)) { + sample_count <- dba.count(sample, summits = opt$summits) } else { - sample_count = dba.count(sample) + sample_count <- dba.count(sample) } -sample_contrast = dba.contrast(sample_count, categories=DBA_CONDITION, minMembers=2) -sample_analyze = dba.analyze(sample_contrast) -diff_bind = dba.report(sample_analyze, th=opt$th) +sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2) +sample_analyze <- dba.analyze(sample_contrast) +diff_bind <- dba.report(sample_analyze, th = opt$th) # Generate plots -if ( !is.null(opt$plots) ) { +if (!is.null(opt$plots)) { pdf(opt$plots) - orvals = dba.plotHeatmap(sample_analyze, contrast=1, correlations=FALSE, cexCol=0.8, th=opt$th) - dba.plotPCA(sample_analyze, contrast=1, th=opt$th, label=DBA_TISSUE, labelSize=0.3) - dba.plotMA(sample_analyze, th=opt$th) - dba.plotVolcano(sample_analyze, th=opt$th) - dba.plotBox(sample_analyze, th=opt$th) + orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th) + dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3) + dba.plotMA(sample_analyze, th = opt$th) + dba.plotVolcano(sample_analyze, th = opt$th) + dba.plotBox(sample_analyze, th = opt$th) dev.off() } # Output differential binding sites -resSorted <- diff_bind[order(diff_bind$FDR),] +res_sorted <- diff_bind[order(diff_bind$FDR), ] # Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/) if (opt$format == "bed") { - resSorted <- data.frame(Chrom=seqnames(resSorted), - Start=start(resSorted) - 1, - End=end(resSorted), - Name=rep("DiffBind", length(resSorted)), - Score=rep("0", length(resSorted)), - Strand=gsub("\\*", ".", strand(resSorted))) + res_sorted <- data.frame(Chrom = seqnames(res_sorted), + Start = start(res_sorted) - 1, + End = end(res_sorted), + Name = rep("DiffBind", length(res_sorted)), + Score = rep("0", length(res_sorted)), + Strand = gsub("\\*", ".", strand(res_sorted))) } else if (opt$format == "interval") { # Output as interval - df <- as.data.frame(resSorted) + df <- as.data.frame(res_sorted) extrainfo <- NULL - for (i in 1:nrow(df)) { - extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse="|") + for (i in seq_len(nrow(df))) { + extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|") } - resSorted <- data.frame(Chrom=seqnames(resSorted), - Start=start(resSorted) - 1, - End=end(resSorted), - Name=rep("DiffBind", length(resSorted)), - Score=rep("0", length(resSorted)), - Strand=gsub("\\*", ".", strand(resSorted)), - Comment=extrainfo) + res_sorted <- data.frame(Chrom = seqnames(res_sorted), + Start = start(res_sorted) - 1, + End = end(res_sorted), + Name = rep("DiffBind", length(res_sorted)), + Score = rep("0", length(res_sorted)), + Strand = gsub("\\*", ".", strand(res_sorted)), + Comment = extrainfo) } else { # Output as 0-based tabular - resSorted <- data.frame(Chrom=seqnames(resSorted), - Start=start(resSorted) - 1, - End=end(resSorted), - Name=rep("DiffBind", length(resSorted)), - Score=rep("0", length(resSorted)), - Strand=gsub("\\*", ".", strand(resSorted)), - mcols(resSorted)) + res_sorted <- data.frame(Chrom = seqnames(res_sorted), + Start = start(res_sorted) - 1, + End = end(res_sorted), + Name = rep("DiffBind", length(res_sorted)), + Score = rep("0", length(res_sorted)), + Strand = gsub("\\*", ".", strand(res_sorted)), + mcols(res_sorted)) } -write.table(resSorted, file = opt$outfile, sep="\t", quote = FALSE, row.names = FALSE) +write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE) # Output binding affinity scores if (!is.null(opt$bmatrix)) { - bmat <- dba.peakset(sample_count, bRetrieve=TRUE, DataType=DBA_DATA_FRAME) + bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME) # Output as 0-based tabular - bmat <- data.frame(Chrom=bmat[, 1], - Start=bmat[, 2] - 1, - End=bmat[, 3], + bmat <- data.frame(Chrom = bmat[, 1], + Start = bmat[, 2] - 1, + End = bmat[, 3], bmat[, 4:ncol(bmat)]) - write.table(bmat, file="bmatrix.tab", sep="\t", quote=FALSE, row.names=FALSE) + write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE) } # Output RData file @@ -146,10 +148,10 @@ # Output analysis info if (!is.null(opt$infoOpt)) { info <- "DiffBind_analysis_info.txt" - cat("dba.count Info\n\n", file=info, append = TRUE) - capture.output(sample, file=info, append=TRUE) - cat("\ndba.analyze Info\n\n", file=info, append = TRUE) - capture.output(sample_analyze, file=info, append=TRUE) - cat("\nSessionInfo\n\n", file=info, append = TRUE) - capture.output(sessionInfo(), file=info, append=TRUE) -} \ No newline at end of file + cat("dba.count Info\n\n", file = info, append = TRUE) + capture.output(sample, file = info, append = TRUE) + cat("\ndba.analyze Info\n\n", file = info, append = TRUE) + capture.output(sample_analyze, file = info, append = TRUE) + cat("\nSessionInfo\n\n", file = info, append = TRUE) + capture.output(sessionInfo(), file = info, append = TRUE) +}