view chipseeker.R @ 8:8bd92f2404dd draft default tip

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/chipseeker commit d30c91c3b4f71ec45b72976f7c2f08ea7df1e376-dirty"
author rnateam
date Fri, 27 Aug 2021 10:49:39 +0000
parents 1b9a9409831d
children
line wrap: on
line source

options(
  show.error.messages = F,
  error = function() {
    cat(geterrmessage(), file = stderr())
    q("no", 1, F)
  }
)

# we need that to not crash galaxy with an UTF8 error on German LC settings.
loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")

suppressPackageStartupMessages({
  library(ChIPseeker)
  library(GenomicFeatures)
  library(rtracklayer)
  library(optparse)
  library(ggupset)
})

option_list <- list(
  make_option(c("-i", "--infile"), type = "character", help = "Peaks file to be annotated"),
  make_option(c("-H", "--header"), type = "logical", help = "Peaks file contains header row"),
  make_option(c("-G", "--gtf"), type = "character", help = "GTF to create TxDb."),
  make_option(c("-u", "--upstream"), type = "integer", help = "TSS upstream region"),
  make_option(c("-d", "--downstream"), type = "integer", help = "TSS downstream region"),
  make_option(c("-F", "--flankgeneinfo"), type = "logical", help = "Add flanking gene info"),
  make_option(c("-D", "--flankgenedist"), type = "integer", help = "Flanking gene distance"),
  make_option(c("-j", "--ignore_upstream"), type = "logical", help = "Ignore upstream"),
  make_option(
    c("-k", "--ignore_downstream"),
    type = "logical",
    help = "Ignore downstream"
  ),
  make_option(c("-f", "--format"), type = "character", help = "Output format (interval or tabular)."),
  make_option(c("-p", "--plots"), type = "logical", help = "PDF of plots."),
  make_option(c("-r", "--rdata"), type = "logical", help = "Output RData file.")
)

parser <- OptionParser(usage = "%prog [options] file", option_list = option_list)
args <- parse_args(parser)

peaks <- args$infile
gtf <- args$gtf
up <- args$upstream
down <- args$downstream
format <- args$format

if (!is.null(args$flankgeneinfo)) {
  flankgeneinfo <- TRUE
} else {
  flankgeneinfo <- FALSE
}

if (!is.null(args$ignore_upstream)) {
  ignore_upstream <- TRUE
} else {
  ignore_upstream <- FALSE
}

if (!is.null(args$ignore_downstream)) {
  ignore_downstream <- TRUE
} else {
  ignore_downstream <- FALSE
}

if (!is.null(args$header)) {
  header <- TRUE
} else {
  header <- FALSE
}

peaks <- readPeakFile(peaks, header = header)

# Make TxDb from GTF
txdb <- makeTxDbFromGFF(gtf, format = "gtf")

# Annotate peaks
peak_anno <-  annotatePeak(
  peaks,
  TxDb = txdb,
  tssRegion = c(-up, down),
  addFlankGeneInfo = flankgeneinfo,
  flankDistance = args$flankgenedist,
  ignoreUpstream = ignore_upstream,
  ignoreDownstream = ignore_downstream
)

# Add gene name
features <- import(gtf, format = "gtf")
ann <- unique(mcols(features)[, c("gene_id", "gene_name")])
res <- as.data.frame(peak_anno)
res <- merge(res, ann, by.x = "geneId", by.y = "gene_id")
names(res)[names(res) == "gene_name"] <- "geneName"

#Extract metadata cols, 1st is geneId, rest should be from col 7 to end
metacols <- res[, c(7:ncol(res), 1)]
# Convert from 1-based to 0-based format
if (format == "interval") {
  metacols <-
    apply(as.data.frame(metacols), 1, function(col)
      paste(col, collapse = "|"))
  resout <- data.frame(res$seqnames,
                       res$start - 1,
                       res$end,
                       metacols)
  colnames(resout)[1:4] <- c("Chrom", "Start", "End", "Comment")
} else {
  resout <- data.frame(res$seqnames,
                       res$start - 1,
                       res$end,
                       metacols)
  colnames(resout)[1:3] <- c("Chrom", "Start", "End")
}
write.table(
  resout,
  file = "out.tab",
  sep = "\t",
  row.names = FALSE,
  quote = FALSE
)

if (!is.null(args$plots)) {
  pdf("out.pdf", width = 14)
  plotAnnoPie(peak_anno)
  p1 <- plotAnnoBar(peak_anno)
  print(p1)
  vennpie(peak_anno)
  upsetplot(peak_anno)
  p2 <-
    plotDistToTSS(peak_anno, title = "Distribution of transcription factor-binding loci\nrelative to TSS")
  print(p2)
  dev.off()
  rm(p1, p2)
}

## Output RData file

if (!is.null(args$rdata)) {
  save.image(file = "ChIPseeker_analysis.RData")
}