Mercurial > repos > idot > coverage_correlation
view corr.R @ 10:6df7b07f81c1
better explanation for optional name
author | Ido Tamir <ido.tamir@imp.ac.at> |
---|---|
date | Sun, 18 Aug 2013 18:10:48 +0200 |
parents | ecad2cd5038f |
children | 74bfa2464411 |
line wrap: on
line source
#!/usr/bin/env Rscript # #for SGE #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript #$ -q solexa.q #$ -P pipeline #$ -cwd #$ -l vf=30G #args[1] <- comma separated bigwig input files #args[2] <- comma separated names of files #args[3] <- comma separated file formats #args[4] <- output pdf #args[5] <- output matrix #args[6] <- title #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong options("useFancyQuotes" = FALSE) library(rtracklayer) library(lattice) library(latticeExtra) library(hexbin) ## creates the union of coverages i.e. mappable regions ## todo: add mappability argument createMappable <- function(coverages){ suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings } ## adds mappable regions as 0 coverage to track covWith0 <- function(cov, mappable){ print(paste("cov:", summary(cov))) c0 <- setdiff(mappable, cov) cus <- if(length(c0) > 0){ elementMetadata(c0)$score <- 0 sort(c(c0, cov)) }else{ sort(cov) } rc <- Rle(score(cus), ranges(cus)@width) rc } ## unlisted rlelist is too long. Taking weighted mean as approximation. correlate <- function(rlA, rlB){ weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length))) } ## correlation dist corrDist <- function(covRles, outnames){ print("calculating correlation") vl <- length(covRles) v <- 1:vl o <- matrix(NA,vl,vl) colnames(o) <- outnames rownames(o) <- outnames tri <- lower.tri(o, diag=FALSE) indic <- expand.grid(v,v) pairs <- indic[tri,] corrs <- apply(pairs,1, function(ins){ index1 <- ins[1] index2 <- ins[2] cor(covRles[[index1]], covRles[[index2]]) # correlate(covRles[[index1]], covRles[[index2]]) }) o[lower.tri(o, diag=FALSE)] <- corrs o[upper.tri(o)] <- t(o)[upper.tri(o)] diag(o) <- 1 o } plotCorr <- function(corrMat, title){ cor.d <- dist(corrMat) cor.row <- as.dendrogram( hclust( cor.d )) ord <- order.dendrogram(cor.row) levelplot( corrMat[ord,ord], scales=list(x=list(rot=90)), colorkey = list(space="left"), col.regions=BTY(100), xlab = "", ylab = "pearson correlation coefficient", legend = list(top=list( fun = dendrogramGrob, args= list( x=cor.row, ord = ord, side = "top" ))), main=title ) } calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){ ca <- lapply(coverages, covWith0, mappable) cd <- corrDist(ca, outnames) write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE) pdf(outnamePDF) print(plotCorr(cd, title)) dev.off() } getCoverage <- function(infile, format){ print(paste("reading", infile, format)) values <- import(infile, format=format, asRangedData = FALSE) if(tolower(format) %in% c("bigwig","wig")){ values }else{ cov <- coverage(values) gcov <- as(cov, "GRanges") gcov[score(gcov) > 0] } } getCoverages <- function(infiles, formats){ apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) }) } getMappable <- function(coverages, mappable){ print(paste("creating mappability", length(coverages), mappable)) if(is.na(mappable)){ createMappable(coverages) }else{ import(mappable, format="bed", asRangedData = FALSE) } } args <- commandArgs(TRUE) infiles <- unlist(strsplit(args[1], ",")) outnames <- unlist(strsplit(args[2], ",")) formats <- unlist(strsplit(args[3], ",")) outnamePDF <- args[4] outnameMat <- args[5] title <- args[6] mappable <- args[7] lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable) lpp <- sapply(lp, paste, sep=" ") print("parsed input parameters") print(lpp) coverages <- getCoverages(infiles, formats) mappability <- getMappable(coverages, mappable) calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title)