view corr.R @ 8:44920cae2f4d

added execute rights
author Ido Tamir <ido.tamir@imp.ac.at>
date Sun, 18 Aug 2013 17:55:44 +0200
parents fcf85568a102
children ecad2cd5038f
line wrap: on
line source

#!/usr/bin/env Rscript
#
#for SGE
#$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript 
#$ -q solexa.q
#$ -P pipeline
#$ -cwd
#$ -l vf=30G

#args[1] <- comma separated bigwig input files
#args[2] <- comma separated names of files
#args[3] <- comma separated file formats
#args[4] <- output pdf
#args[5] <- output matrix
#args[6] <- title
#args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong

options("useFancyQuotes" = FALSE)

library(rtracklayer)
library(lattice)
library(latticeExtra)
library(hexbin)

## creates the union of coverages i.e. mappable regions
## todo: add mappability argument
createMappable <- function(coverages){
    suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings
}

## adds mappable regions as 0 coverage to track 
covWith0 <- function(cov, mappable){
    print(paste("cov:",cov))
	c0 <- setdiff(mappable, cov)
    cus <- if(length(c0) > 0){
	  elementMetadata(c0)$score <- 0
      sort(c(c0, cov))
	}else{
      sort(cov)
    }
    rc <- Rle(score(cus), ranges(cus)@width)
    rc
}

## unlisted rlelist is too long. Taking weighted mean as approximation.
correlate <- function(rlA, rlB){
   weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length)))
}


## correlation dist
corrDist <- function(covRles, outnames){
    print("calculating correlation")
	vl <- length(covRles)
    v <- 1:vl
    o <- matrix(NA,vl,vl)
    colnames(o) <- outnames
    rownames(o) <- outnames
    tri <- lower.tri(o, diag=FALSE)   
    indic <- expand.grid(v,v)
    pairs <- indic[tri,]
    corrs <- apply(pairs,1, function(ins){
        index1 <- ins[1]
        index2 <- ins[2]
        cor(covRles[[index1]], covRles[[index2]])
	   # correlate(covRles[[index1]], covRles[[index2]])
    })
    
    o[lower.tri(o, diag=FALSE)] <- corrs
    o[upper.tri(o)] <- t(o)[upper.tri(o)]
    diag(o) <- 1
    o
}



plotCorr <- function(corrMat, title){
    cor.d <- dist(corrMat)
    cor.row <- as.dendrogram( hclust( cor.d ))
    ord <- order.dendrogram(cor.row)
    levelplot(
       corrMat[ord,ord],
       scales=list(x=list(rot=90)),
       colorkey = list(space="left"),
       col.regions=BTY(100),
       xlab = "",
       ylab = "pearson correlation coefficient",
       legend = list(top=list( fun = dendrogramGrob, args= list(
                             x=cor.row,
                             ord = ord,
                             side = "top"
                             ))),
	   main=title	   
	   )

}

calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){

    ca <- lapply(coverages, covWith0, mappable)
    
    cd <- corrDist(ca, outnames)
	write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE)
    pdf(outnamePDF)
	print(plotCorr(cd, title))
    dev.off()

}

getCoverage <- function(infile, format){
  print(paste("reading", infile, format))
  values <- import(infile, format=format, asRangedData = FALSE)  
  if(tolower(format) %in% c("bigwig","wig")){
     values
  }else{
     cov <- coverage(values)
     gcov <- as(cov, "GRanges")
     gcov[score(gcov) > 0]
  }
}


getCoverages <- function(infiles, formats){
   apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) })
}

getMappable <- function(coverages, mappable){
    print(paste("creating mappability", mappable))
	if(is.na(mappable)){
	    createMappable(coverages)
    }else{
        import(mappable, format="bed", asRangedData = FALSE)
    }
}


args <- commandArgs(TRUE)

infiles <- unlist(strsplit(args[1], ","))
outnames <- unlist(strsplit(args[2], ","))
formats <- unlist(strsplit(args[3], ","))
outnamePDF <- args[4]
outnameMat <- args[5]
title <- args[6]
mappable <- args[7]

lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable)
lpp <- sapply(lp, paste, sep=" ")
print("parsed input parameters")
print(lpp)


coverages <- getCoverages(infiles, formats)
mappability <- getMappable(coverages, mappable)

calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title)