view diffbind.R @ 18:f907216064f6 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit fd148a124034b44d0d61db3eec32ff991d8c152c
author iuc
date Mon, 08 Jul 2024 18:31:51 +0000
parents 2605cbdaa7d8
children
line wrap: on
line source

## Setup R error handling to go to stderr
options(show.error.messages = F, error = function() {
    cat(geterrmessage(), file = stderr())
    q("no", 1, F)
})
# we need that to not crash galaxy with an UTF8 error on German LC settings.
Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")

suppressPackageStartupMessages({
    library("getopt")
    library("DiffBind")
    library("rjson")
})

options(stringAsfactors = FALSE, useFancyQuotes = FALSE)
args <- commandArgs(trailingOnly = TRUE)

# get options, using the spec as defined by the enclosed list.
# we read the options from the default: commandArgs(TRUE).
spec <- matrix(c(
    "infile", "i", 1, "character",
    "outfile", "o", 1, "character",
    "method", "m", 1, "character",
    "scorecol", "n", 1, "integer",
    "lowerbetter", "l", 1, "logical",
    "summits", "s", 1, "integer",
    "th", "t", 1, "double",
    "minoverlap", "O", 1, "integer",
    "use_blacklist", "B", 0, "logical",
    "format", "f", 1, "character",
    "plots", "p", 2, "character",
    "bmatrix", "b", 0, "logical",
    "rdaOpt", "r", 0, "logical",
    "infoOpt", "a", 0, "logical",
    "verbose", "v", 2, "integer",
    "help", "h", 0, "logical"
), byrow = TRUE, ncol = 4)

opt <- getopt(spec)
# if help was asked for print a friendly message
# and exit with a non-zero error code
if (!is.null(opt$help)) {
    cat(getopt(spec, usage = TRUE))
    q(status = 1)
}

parser <- newJSONParser()
parser$addData(opt$infile)
factor_list <- parser$getObject()
filenames_in <- unname(unlist(factor_list[[1]][[2]]))
peaks <- filenames_in[grepl("peaks.bed", filenames_in)]
bams <- filenames_in[grepl("bamreads.bam", filenames_in)]
ctrls <- filenames_in[grepl("bamcontrol.bam", filenames_in)]

# get the group and sample id from the peaks filenames
groups <- sapply(strsplit(peaks, "-"), `[`, 1)
samples <- sapply(strsplit(peaks, "-"), `[`, 2)

if (length(ctrls) != 0) {
    sample_table <- data.frame(
        SampleID = samples,
        Condition = groups,
        bamReads = bams,
        bamControl = ctrls,
        Peaks = peaks,
        Tissue = samples
    ) # using "Tissue" column to display ids as labels in PCA plot
} else {
    sample_table <- data.frame(
        SampleID = samples,
        Replicate = samples,
        Condition = groups,
        bamReads = bams,
        Peaks = peaks,
        Tissue = samples
    )
}

sample <- dba(sampleSheet = sample_table, peakFormat = "bed", scoreCol = opt$scorecol, bLowerScoreBetter = opt$lowerbetter, minOverlap = opt$minoverlap)

if (!is.null(opt$use_blacklist)) {
    sample <- dba.blacklist(sample, blacklist = TRUE)
}

if (!is.null(opt$summits)) {
    sample_count <- dba.count(sample, summits = opt$summits)
} else {
    sample_count <- dba.count(sample)
}

sample_contrast <- dba.contrast(sample_count, categories = DBA_CONDITION, minMembers = 2)

if (opt$method == "DBA_DESEQ2") {
    method <- DBA_DESEQ2
} else if (opt$method == "DBA_EDGER") {
    method <- DBA_EDGER
}

sample_analyze <- dba.analyze(sample_contrast, method = method, bBlacklist = FALSE, bGreylist = FALSE)

diff_bind <- dba.report(sample_analyze, th = opt$th, method = method)

# Generate plots
if (!is.null(opt$plots)) {
    pdf(opt$plots)
    orvals <- dba.plotHeatmap(sample_analyze, contrast = 1, correlations = FALSE, cexCol = 0.8, th = opt$th, method = method)
    dba.plotPCA(sample_analyze, contrast = 1, th = opt$th, label = DBA_TISSUE, labelSize = 0.3, method = method)
    dba.plotMA(sample_analyze, th = opt$th, method = method)
    dba.plotVolcano(sample_analyze, th = opt$th, method = method)
    dba.plotBox(sample_analyze, th = opt$th, method = method)
    dev.off()
}

# Output differential binding sites
res_sorted <- diff_bind[order(diff_bind$FDR), ]
# Convert from GRanges (1-based) to 0-based format (adapted from https://www.biostars.org/p/89341/)
if (opt$format == "bed") {
    res_sorted <- data.frame(
        Chrom = seqnames(res_sorted),
        Start = start(res_sorted) - 1,
        End = end(res_sorted),
        Name = rep("DiffBind", length(res_sorted)),
        Score = rep("0", length(res_sorted)),
        Strand = gsub("\\*", ".", strand(res_sorted))
    )
} else if (opt$format == "interval") {
    # Output as interval
    df <- as.data.frame(res_sorted)
    extrainfo <- NULL
    for (i in seq_len(nrow(df))) {
        extrainfo[i] <- paste0(c(df$width[i], df[i, 6:ncol(df)]), collapse = "|")
    }
    res_sorted <- data.frame(
        Chrom = seqnames(res_sorted),
        Start = start(res_sorted) - 1,
        End = end(res_sorted),
        Name = rep("DiffBind", length(res_sorted)),
        Score = rep("0", length(res_sorted)),
        Strand = gsub("\\*", ".", strand(res_sorted)),
        Comment = extrainfo
    )
} else {
    # Output as 0-based tabular
    res_sorted <- data.frame(
        Chrom = seqnames(res_sorted),
        Start = start(res_sorted) - 1,
        End = end(res_sorted),
        Name = rep("DiffBind", length(res_sorted)),
        Score = rep("0", length(res_sorted)),
        Strand = gsub("\\*", ".", strand(res_sorted)),
        mcols(res_sorted)
    )
}
write.table(res_sorted, file = opt$outfile, sep = "\t", quote = FALSE, row.names = FALSE)

# Output binding affinity scores
if (!is.null(opt$bmatrix)) {
    bmat <- dba.peakset(sample_count, bRetrieve = TRUE, DataType = DBA_DATA_FRAME, minOverlap = opt$minoverlap)
    # Output as 0-based tabular
    bmat <- data.frame(
        Chrom = bmat[, 1],
        Start = bmat[, 2] - 1,
        End = bmat[, 3],
        bmat[, 4:ncol(bmat)]
    )
    write.table(bmat, file = "bmatrix.tab", sep = "\t", quote = FALSE, row.names = FALSE)
}

# Output RData file
if (!is.null(opt$rdaOpt)) {
    save.image(file = "DiffBind_analysis.RData")
}

# Output analysis info
if (!is.null(opt$infoOpt)) {
    info <- "DiffBind_analysis_info.txt"
    cat("dba.count Info\n\n", file = info, append = TRUE)
    capture.output(sample, file = info, append = TRUE)
    cat("\ndba.analyze Info\n\n", file = info, append = TRUE)
    capture.output(sample_analyze, file = info, append = TRUE)
    cat("\nSessionInfo\n\n", file = info, append = TRUE)
    capture.output(sessionInfo(), file = info, append = TRUE)
}