changeset 2:1eb46757d8bb

Uploaded
author idot
date Sun, 18 Aug 2013 09:42:48 -0400
parents f106b52086b0
children 7bdd29cdfed8
files corr.R
diffstat 1 files changed, 157 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/corr.R	Sun Aug 18 09:42:48 2013 -0400
@@ -0,0 +1,157 @@
+#!/usr/bin/env Rscript
+#
+#for SGE
+#$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript 
+#$ -q solexa.q
+#$ -P pipeline
+#$ -cwd
+#$ -l vf=30G
+
+#args[1] <- comma separated bigwig input files
+#args[2] <- comma separated names of files
+#args[3] <- comma separated file formats
+#args[4] <- output pdf
+#args[5] <- output matrix
+#args[6] <- title
+#args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong
+
+options("useFancyQuotes" = FALSE)
+
+library(rtracklayer)
+library(lattice)
+library(latticeExtra)
+library(hexbin)
+
+## creates the union of coverages i.e. mappable regions
+## todo: add mappability argument
+createMappable <- function(coverages){
+    suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings
+}
+
+## adds mappable regions as 0 coverage to track 
+covWith0 <- function(cov, mappable){
+    c0 <- setdiff(mappable, cov)
+    cus <- if(length(c0) > 0){
+	  elementMetadata(c0)$score <- 0
+      sort(c(c0, cov))
+	}else{
+      sort(cov)
+    }
+    rc <- Rle(score(cus), ranges(cus)@width)
+    rc
+}
+
+## unlisted rlelist is too long. Taking weighted mean as approximation.
+correlate <- function(rlA, rlB){
+   weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length)))
+}
+
+
+## correlation dist
+corrDist <- function(covRles, outnames){
+    vl <- length(covRles)
+    v <- 1:vl
+    o <- matrix(NA,vl,vl)
+    colnames(o) <- outnames
+    rownames(o) <- outnames
+    tri <- lower.tri(o, diag=FALSE)   
+    indic <- expand.grid(v,v)
+    pairs <- indic[tri,]
+    corrs <- apply(pairs,1, function(ins){
+        index1 <- ins[1]
+        index2 <- ins[2]
+        cor(covRles[[index1]], covRles[[index2]])
+	   # correlate(covRles[[index1]], covRles[[index2]])
+    })
+    
+    o[lower.tri(o, diag=FALSE)] <- corrs
+    o[upper.tri(o)] <- t(o)[upper.tri(o)]
+    diag(o) <- 1
+    o
+}
+
+
+
+plotCorr <- function(corrMat, title){
+    cor.d <- dist(corrMat)
+    cor.row <- as.dendrogram( hclust( cor.d ))
+    ord <- order.dendrogram(cor.row)
+    levelplot(
+       corrMat[ord,ord],
+       scales=list(x=list(rot=90)),
+       colorkey = list(space="left"),
+       col.regions=BTY(100),
+       xlab = "",
+       ylab = "pearson correlation coefficient",
+       legend = list(top=list( fun = dendrogramGrob, args= list(
+                             x=cor.row,
+                             ord = ord,
+                             side = "top"
+                             ))),
+	   main=title	   
+	   )
+
+}
+
+calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){
+
+    ca <- lapply(coverages, covWith0, mappable)
+    
+    cd <- corrDist(ca, outnames)
+	write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE)
+    pdf(outnamePDF)
+	print(plotCorr(cd, title))
+    dev.off()
+
+}
+
+getCoverage <- function(infile, format){
+  print(paste("reading", infile, format))
+  values <- import(infile, format=format, asRangedData = FALSE)  
+  if(tolower(format) %in% c("bigwig","wig")){
+     values
+  }else{
+     cov <- coverage(values)
+     gcov <- as(cov, "GRanges")
+     gcov[score(gcov) > 0]
+  }
+}
+
+
+getCoverages <- function(infiles, formats){
+   apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) })
+}
+
+getMappable <- function(mappable){
+    if(is.na(mappable)){
+	    createMappable(coverages)
+    }else{
+        import(mappable, format="bed", asRangedData = FALSE)
+    }
+}
+
+
+args <- commandArgs(TRUE)
+
+infiles <- unlist(strsplit(args[1], ","))
+outnames <- unlist(strsplit(args[2], ","))
+formats <- unlist(strsplit(args[3], ","))
+outnamePDF <- args[4]
+outnameMat <- args[5]
+title <- args[6]
+mappable <- args[7]
+
+lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable)
+lpp <- sapply(lp, paste, sep=" ")
+print("parsed input parameters")
+print(lpp)
+
+
+coverages <- getCoverages(infiles, formats)
+mappability <- getMappable(mappable)
+
+calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title)
+
+
+
+