Mercurial > repos > idot > coverage_correlation
changeset 2:1eb46757d8bb
Uploaded
author | idot |
---|---|
date | Sun, 18 Aug 2013 09:42:48 -0400 |
parents | f106b52086b0 |
children | 7bdd29cdfed8 |
files | corr.R |
diffstat | 1 files changed, 157 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/corr.R Sun Aug 18 09:42:48 2013 -0400 @@ -0,0 +1,157 @@ +#!/usr/bin/env Rscript +# +#for SGE +#$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript +#$ -q solexa.q +#$ -P pipeline +#$ -cwd +#$ -l vf=30G + +#args[1] <- comma separated bigwig input files +#args[2] <- comma separated names of files +#args[3] <- comma separated file formats +#args[4] <- output pdf +#args[5] <- output matrix +#args[6] <- title +#args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong + +options("useFancyQuotes" = FALSE) + +library(rtracklayer) +library(lattice) +library(latticeExtra) +library(hexbin) + +## creates the union of coverages i.e. mappable regions +## todo: add mappability argument +createMappable <- function(coverages){ + suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings +} + +## adds mappable regions as 0 coverage to track +covWith0 <- function(cov, mappable){ + c0 <- setdiff(mappable, cov) + cus <- if(length(c0) > 0){ + elementMetadata(c0)$score <- 0 + sort(c(c0, cov)) + }else{ + sort(cov) + } + rc <- Rle(score(cus), ranges(cus)@width) + rc +} + +## unlisted rlelist is too long. Taking weighted mean as approximation. +correlate <- function(rlA, rlB){ + weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length))) +} + + +## correlation dist +corrDist <- function(covRles, outnames){ + vl <- length(covRles) + v <- 1:vl + o <- matrix(NA,vl,vl) + colnames(o) <- outnames + rownames(o) <- outnames + tri <- lower.tri(o, diag=FALSE) + indic <- expand.grid(v,v) + pairs <- indic[tri,] + corrs <- apply(pairs,1, function(ins){ + index1 <- ins[1] + index2 <- ins[2] + cor(covRles[[index1]], covRles[[index2]]) + # correlate(covRles[[index1]], covRles[[index2]]) + }) + + o[lower.tri(o, diag=FALSE)] <- corrs + o[upper.tri(o)] <- t(o)[upper.tri(o)] + diag(o) <- 1 + o +} + + + +plotCorr <- function(corrMat, title){ + cor.d <- dist(corrMat) + cor.row <- as.dendrogram( hclust( cor.d )) + ord <- order.dendrogram(cor.row) + levelplot( + corrMat[ord,ord], + scales=list(x=list(rot=90)), + colorkey = list(space="left"), + col.regions=BTY(100), + xlab = "", + ylab = "pearson correlation coefficient", + legend = list(top=list( fun = dendrogramGrob, args= list( + x=cor.row, + ord = ord, + side = "top" + ))), + main=title + ) + +} + +calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){ + + ca <- lapply(coverages, covWith0, mappable) + + cd <- corrDist(ca, outnames) + write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE) + pdf(outnamePDF) + print(plotCorr(cd, title)) + dev.off() + +} + +getCoverage <- function(infile, format){ + print(paste("reading", infile, format)) + values <- import(infile, format=format, asRangedData = FALSE) + if(tolower(format) %in% c("bigwig","wig")){ + values + }else{ + cov <- coverage(values) + gcov <- as(cov, "GRanges") + gcov[score(gcov) > 0] + } +} + + +getCoverages <- function(infiles, formats){ + apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) }) +} + +getMappable <- function(mappable){ + if(is.na(mappable)){ + createMappable(coverages) + }else{ + import(mappable, format="bed", asRangedData = FALSE) + } +} + + +args <- commandArgs(TRUE) + +infiles <- unlist(strsplit(args[1], ",")) +outnames <- unlist(strsplit(args[2], ",")) +formats <- unlist(strsplit(args[3], ",")) +outnamePDF <- args[4] +outnameMat <- args[5] +title <- args[6] +mappable <- args[7] + +lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable) +lpp <- sapply(lp, paste, sep=" ") +print("parsed input parameters") +print(lpp) + + +coverages <- getCoverages(infiles, formats) +mappability <- getMappable(mappable) + +calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title) + + + +