Mercurial > repos > idot > coverage_correlation
annotate corr.R @ 10:6df7b07f81c1
better explanation for optional name
| author | Ido Tamir <ido.tamir@imp.ac.at> | 
|---|---|
| date | Sun, 18 Aug 2013 18:10:48 +0200 | 
| parents | ecad2cd5038f | 
| children | 74bfa2464411 | 
| rev | line source | 
|---|---|
| 2 | 1 #!/usr/bin/env Rscript | 
| 2 # | |
| 3 #for SGE | |
| 4 #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript | |
| 5 #$ -q solexa.q | |
| 6 #$ -P pipeline | |
| 7 #$ -cwd | |
| 8 #$ -l vf=30G | |
| 9 | |
| 10 #args[1] <- comma separated bigwig input files | |
| 11 #args[2] <- comma separated names of files | |
| 12 #args[3] <- comma separated file formats | |
| 13 #args[4] <- output pdf | |
| 14 #args[5] <- output matrix | |
| 15 #args[6] <- title | |
| 16 #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong | |
| 17 | |
| 18 options("useFancyQuotes" = FALSE) | |
| 19 | |
| 20 library(rtracklayer) | |
| 21 library(lattice) | |
| 22 library(latticeExtra) | |
| 23 library(hexbin) | |
| 24 | |
| 25 ## creates the union of coverages i.e. mappable regions | |
| 26 ## todo: add mappability argument | |
| 27 createMappable <- function(coverages){ | |
| 28 suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings | |
| 29 } | |
| 30 | |
| 31 ## adds mappable regions as 0 coverage to track | |
| 32 covWith0 <- function(cov, mappable){ | |
| 9 
ecad2cd5038f
debug message was making truble
 Ido Tamir <ido.tamir@imp.ac.at> parents: 
6diff
changeset | 33 print(paste("cov:", summary(cov))) | 
| 3 | 34 c0 <- setdiff(mappable, cov) | 
| 2 | 35 cus <- if(length(c0) > 0){ | 
| 36 elementMetadata(c0)$score <- 0 | |
| 37 sort(c(c0, cov)) | |
| 38 }else{ | |
| 39 sort(cov) | |
| 40 } | |
| 41 rc <- Rle(score(cus), ranges(cus)@width) | |
| 42 rc | |
| 43 } | |
| 44 | |
| 45 ## unlisted rlelist is too long. Taking weighted mean as approximation. | |
| 46 correlate <- function(rlA, rlB){ | |
| 47 weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length))) | |
| 48 } | |
| 49 | |
| 50 | |
| 51 ## correlation dist | |
| 52 corrDist <- function(covRles, outnames){ | |
| 3 | 53 print("calculating correlation") | 
| 54 vl <- length(covRles) | |
| 2 | 55 v <- 1:vl | 
| 56 o <- matrix(NA,vl,vl) | |
| 57 colnames(o) <- outnames | |
| 58 rownames(o) <- outnames | |
| 59 tri <- lower.tri(o, diag=FALSE) | |
| 60 indic <- expand.grid(v,v) | |
| 61 pairs <- indic[tri,] | |
| 62 corrs <- apply(pairs,1, function(ins){ | |
| 63 index1 <- ins[1] | |
| 64 index2 <- ins[2] | |
| 65 cor(covRles[[index1]], covRles[[index2]]) | |
| 66 # correlate(covRles[[index1]], covRles[[index2]]) | |
| 67 }) | |
| 68 | |
| 69 o[lower.tri(o, diag=FALSE)] <- corrs | |
| 70 o[upper.tri(o)] <- t(o)[upper.tri(o)] | |
| 71 diag(o) <- 1 | |
| 72 o | |
| 73 } | |
| 74 | |
| 75 | |
| 76 | |
| 77 plotCorr <- function(corrMat, title){ | |
| 78 cor.d <- dist(corrMat) | |
| 79 cor.row <- as.dendrogram( hclust( cor.d )) | |
| 80 ord <- order.dendrogram(cor.row) | |
| 81 levelplot( | |
| 82 corrMat[ord,ord], | |
| 83 scales=list(x=list(rot=90)), | |
| 84 colorkey = list(space="left"), | |
| 85 col.regions=BTY(100), | |
| 86 xlab = "", | |
| 87 ylab = "pearson correlation coefficient", | |
| 88 legend = list(top=list( fun = dendrogramGrob, args= list( | |
| 89 x=cor.row, | |
| 90 ord = ord, | |
| 91 side = "top" | |
| 92 ))), | |
| 93 main=title | |
| 94 ) | |
| 95 | |
| 96 } | |
| 97 | |
| 98 calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){ | |
| 99 | |
| 100 ca <- lapply(coverages, covWith0, mappable) | |
| 101 | |
| 102 cd <- corrDist(ca, outnames) | |
| 103 write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE) | |
| 104 pdf(outnamePDF) | |
| 105 print(plotCorr(cd, title)) | |
| 106 dev.off() | |
| 107 | |
| 108 } | |
| 109 | |
| 110 getCoverage <- function(infile, format){ | |
| 111 print(paste("reading", infile, format)) | |
| 112 values <- import(infile, format=format, asRangedData = FALSE) | |
| 113 if(tolower(format) %in% c("bigwig","wig")){ | |
| 114 values | |
| 115 }else{ | |
| 116 cov <- coverage(values) | |
| 117 gcov <- as(cov, "GRanges") | |
| 118 gcov[score(gcov) > 0] | |
| 119 } | |
| 120 } | |
| 121 | |
| 122 | |
| 123 getCoverages <- function(infiles, formats){ | |
| 124 apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) }) | |
| 125 } | |
| 126 | |
| 6 
fcf85568a102
dont know how this worked. added coverages to mappable
 Ido Tamir <ido.tamir@imp.ac.at> parents: 
3diff
changeset | 127 getMappable <- function(coverages, mappable){ | 
| 9 
ecad2cd5038f
debug message was making truble
 Ido Tamir <ido.tamir@imp.ac.at> parents: 
6diff
changeset | 128 print(paste("creating mappability", length(coverages), mappable)) | 
| 6 
fcf85568a102
dont know how this worked. added coverages to mappable
 Ido Tamir <ido.tamir@imp.ac.at> parents: 
3diff
changeset | 129 if(is.na(mappable)){ | 
| 2 | 130 createMappable(coverages) | 
| 131 }else{ | |
| 132 import(mappable, format="bed", asRangedData = FALSE) | |
| 133 } | |
| 134 } | |
| 135 | |
| 136 | |
| 137 args <- commandArgs(TRUE) | |
| 138 | |
| 139 infiles <- unlist(strsplit(args[1], ",")) | |
| 140 outnames <- unlist(strsplit(args[2], ",")) | |
| 141 formats <- unlist(strsplit(args[3], ",")) | |
| 142 outnamePDF <- args[4] | |
| 143 outnameMat <- args[5] | |
| 144 title <- args[6] | |
| 145 mappable <- args[7] | |
| 146 | |
| 147 lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable) | |
| 148 lpp <- sapply(lp, paste, sep=" ") | |
| 149 print("parsed input parameters") | |
| 150 print(lpp) | |
| 151 | |
| 152 | |
| 153 coverages <- getCoverages(infiles, formats) | |
| 6 
fcf85568a102
dont know how this worked. added coverages to mappable
 Ido Tamir <ido.tamir@imp.ac.at> parents: 
3diff
changeset | 154 mappability <- getMappable(coverages, mappable) | 
| 2 | 155 | 
| 156 calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title) | |
| 157 | |
| 158 | |
| 159 | |
| 160 | 
