comparison corr.R @ 2:1eb46757d8bb

Uploaded
author idot
date Sun, 18 Aug 2013 09:42:48 -0400
parents
children 7bdd29cdfed8
comparison
equal deleted inserted replaced
1:f106b52086b0 2:1eb46757d8bb
1 #!/usr/bin/env Rscript
2 #
3 #for SGE
4 #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript
5 #$ -q solexa.q
6 #$ -P pipeline
7 #$ -cwd
8 #$ -l vf=30G
9
10 #args[1] <- comma separated bigwig input files
11 #args[2] <- comma separated names of files
12 #args[3] <- comma separated file formats
13 #args[4] <- output pdf
14 #args[5] <- output matrix
15 #args[6] <- title
16 #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong
17
18 options("useFancyQuotes" = FALSE)
19
20 library(rtracklayer)
21 library(lattice)
22 library(latticeExtra)
23 library(hexbin)
24
25 ## creates the union of coverages i.e. mappable regions
26 ## todo: add mappability argument
27 createMappable <- function(coverages){
28 suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings
29 }
30
31 ## adds mappable regions as 0 coverage to track
32 covWith0 <- function(cov, mappable){
33 c0 <- setdiff(mappable, cov)
34 cus <- if(length(c0) > 0){
35 elementMetadata(c0)$score <- 0
36 sort(c(c0, cov))
37 }else{
38 sort(cov)
39 }
40 rc <- Rle(score(cus), ranges(cus)@width)
41 rc
42 }
43
44 ## unlisted rlelist is too long. Taking weighted mean as approximation.
45 correlate <- function(rlA, rlB){
46 weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length)))
47 }
48
49
50 ## correlation dist
51 corrDist <- function(covRles, outnames){
52 vl <- length(covRles)
53 v <- 1:vl
54 o <- matrix(NA,vl,vl)
55 colnames(o) <- outnames
56 rownames(o) <- outnames
57 tri <- lower.tri(o, diag=FALSE)
58 indic <- expand.grid(v,v)
59 pairs <- indic[tri,]
60 corrs <- apply(pairs,1, function(ins){
61 index1 <- ins[1]
62 index2 <- ins[2]
63 cor(covRles[[index1]], covRles[[index2]])
64 # correlate(covRles[[index1]], covRles[[index2]])
65 })
66
67 o[lower.tri(o, diag=FALSE)] <- corrs
68 o[upper.tri(o)] <- t(o)[upper.tri(o)]
69 diag(o) <- 1
70 o
71 }
72
73
74
75 plotCorr <- function(corrMat, title){
76 cor.d <- dist(corrMat)
77 cor.row <- as.dendrogram( hclust( cor.d ))
78 ord <- order.dendrogram(cor.row)
79 levelplot(
80 corrMat[ord,ord],
81 scales=list(x=list(rot=90)),
82 colorkey = list(space="left"),
83 col.regions=BTY(100),
84 xlab = "",
85 ylab = "pearson correlation coefficient",
86 legend = list(top=list( fun = dendrogramGrob, args= list(
87 x=cor.row,
88 ord = ord,
89 side = "top"
90 ))),
91 main=title
92 )
93
94 }
95
96 calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){
97
98 ca <- lapply(coverages, covWith0, mappable)
99
100 cd <- corrDist(ca, outnames)
101 write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE)
102 pdf(outnamePDF)
103 print(plotCorr(cd, title))
104 dev.off()
105
106 }
107
108 getCoverage <- function(infile, format){
109 print(paste("reading", infile, format))
110 values <- import(infile, format=format, asRangedData = FALSE)
111 if(tolower(format) %in% c("bigwig","wig")){
112 values
113 }else{
114 cov <- coverage(values)
115 gcov <- as(cov, "GRanges")
116 gcov[score(gcov) > 0]
117 }
118 }
119
120
121 getCoverages <- function(infiles, formats){
122 apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) })
123 }
124
125 getMappable <- function(mappable){
126 if(is.na(mappable)){
127 createMappable(coverages)
128 }else{
129 import(mappable, format="bed", asRangedData = FALSE)
130 }
131 }
132
133
134 args <- commandArgs(TRUE)
135
136 infiles <- unlist(strsplit(args[1], ","))
137 outnames <- unlist(strsplit(args[2], ","))
138 formats <- unlist(strsplit(args[3], ","))
139 outnamePDF <- args[4]
140 outnameMat <- args[5]
141 title <- args[6]
142 mappable <- args[7]
143
144 lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable)
145 lpp <- sapply(lp, paste, sep=" ")
146 print("parsed input parameters")
147 print(lpp)
148
149
150 coverages <- getCoverages(infiles, formats)
151 mappability <- getMappable(mappable)
152
153 calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title)
154
155
156
157