annotate corr.R @ 2:1eb46757d8bb

Uploaded
author idot
date Sun, 18 Aug 2013 09:42:48 -0400
parents
children 7bdd29cdfed8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
1 #!/usr/bin/env Rscript
1eb46757d8bb Uploaded
idot
parents:
diff changeset
2 #
1eb46757d8bb Uploaded
idot
parents:
diff changeset
3 #for SGE
1eb46757d8bb Uploaded
idot
parents:
diff changeset
4 #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript
1eb46757d8bb Uploaded
idot
parents:
diff changeset
5 #$ -q solexa.q
1eb46757d8bb Uploaded
idot
parents:
diff changeset
6 #$ -P pipeline
1eb46757d8bb Uploaded
idot
parents:
diff changeset
7 #$ -cwd
1eb46757d8bb Uploaded
idot
parents:
diff changeset
8 #$ -l vf=30G
1eb46757d8bb Uploaded
idot
parents:
diff changeset
9
1eb46757d8bb Uploaded
idot
parents:
diff changeset
10 #args[1] <- comma separated bigwig input files
1eb46757d8bb Uploaded
idot
parents:
diff changeset
11 #args[2] <- comma separated names of files
1eb46757d8bb Uploaded
idot
parents:
diff changeset
12 #args[3] <- comma separated file formats
1eb46757d8bb Uploaded
idot
parents:
diff changeset
13 #args[4] <- output pdf
1eb46757d8bb Uploaded
idot
parents:
diff changeset
14 #args[5] <- output matrix
1eb46757d8bb Uploaded
idot
parents:
diff changeset
15 #args[6] <- title
1eb46757d8bb Uploaded
idot
parents:
diff changeset
16 #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong
1eb46757d8bb Uploaded
idot
parents:
diff changeset
17
1eb46757d8bb Uploaded
idot
parents:
diff changeset
18 options("useFancyQuotes" = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
19
1eb46757d8bb Uploaded
idot
parents:
diff changeset
20 library(rtracklayer)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
21 library(lattice)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
22 library(latticeExtra)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
23 library(hexbin)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
24
1eb46757d8bb Uploaded
idot
parents:
diff changeset
25 ## creates the union of coverages i.e. mappable regions
1eb46757d8bb Uploaded
idot
parents:
diff changeset
26 ## todo: add mappability argument
1eb46757d8bb Uploaded
idot
parents:
diff changeset
27 createMappable <- function(coverages){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
28 suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings
1eb46757d8bb Uploaded
idot
parents:
diff changeset
29 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
30
1eb46757d8bb Uploaded
idot
parents:
diff changeset
31 ## adds mappable regions as 0 coverage to track
1eb46757d8bb Uploaded
idot
parents:
diff changeset
32 covWith0 <- function(cov, mappable){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
33 c0 <- setdiff(mappable, cov)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
34 cus <- if(length(c0) > 0){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
35 elementMetadata(c0)$score <- 0
1eb46757d8bb Uploaded
idot
parents:
diff changeset
36 sort(c(c0, cov))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
37 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
38 sort(cov)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
39 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
40 rc <- Rle(score(cus), ranges(cus)@width)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
41 rc
1eb46757d8bb Uploaded
idot
parents:
diff changeset
42 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
43
1eb46757d8bb Uploaded
idot
parents:
diff changeset
44 ## unlisted rlelist is too long. Taking weighted mean as approximation.
1eb46757d8bb Uploaded
idot
parents:
diff changeset
45 correlate <- function(rlA, rlB){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
46 weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length)))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
47 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
48
1eb46757d8bb Uploaded
idot
parents:
diff changeset
49
1eb46757d8bb Uploaded
idot
parents:
diff changeset
50 ## correlation dist
1eb46757d8bb Uploaded
idot
parents:
diff changeset
51 corrDist <- function(covRles, outnames){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
52 vl <- length(covRles)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
53 v <- 1:vl
1eb46757d8bb Uploaded
idot
parents:
diff changeset
54 o <- matrix(NA,vl,vl)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
55 colnames(o) <- outnames
1eb46757d8bb Uploaded
idot
parents:
diff changeset
56 rownames(o) <- outnames
1eb46757d8bb Uploaded
idot
parents:
diff changeset
57 tri <- lower.tri(o, diag=FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
58 indic <- expand.grid(v,v)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
59 pairs <- indic[tri,]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
60 corrs <- apply(pairs,1, function(ins){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
61 index1 <- ins[1]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
62 index2 <- ins[2]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
63 cor(covRles[[index1]], covRles[[index2]])
1eb46757d8bb Uploaded
idot
parents:
diff changeset
64 # correlate(covRles[[index1]], covRles[[index2]])
1eb46757d8bb Uploaded
idot
parents:
diff changeset
65 })
1eb46757d8bb Uploaded
idot
parents:
diff changeset
66
1eb46757d8bb Uploaded
idot
parents:
diff changeset
67 o[lower.tri(o, diag=FALSE)] <- corrs
1eb46757d8bb Uploaded
idot
parents:
diff changeset
68 o[upper.tri(o)] <- t(o)[upper.tri(o)]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
69 diag(o) <- 1
1eb46757d8bb Uploaded
idot
parents:
diff changeset
70 o
1eb46757d8bb Uploaded
idot
parents:
diff changeset
71 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
72
1eb46757d8bb Uploaded
idot
parents:
diff changeset
73
1eb46757d8bb Uploaded
idot
parents:
diff changeset
74
1eb46757d8bb Uploaded
idot
parents:
diff changeset
75 plotCorr <- function(corrMat, title){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
76 cor.d <- dist(corrMat)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
77 cor.row <- as.dendrogram( hclust( cor.d ))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
78 ord <- order.dendrogram(cor.row)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
79 levelplot(
1eb46757d8bb Uploaded
idot
parents:
diff changeset
80 corrMat[ord,ord],
1eb46757d8bb Uploaded
idot
parents:
diff changeset
81 scales=list(x=list(rot=90)),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
82 colorkey = list(space="left"),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
83 col.regions=BTY(100),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
84 xlab = "",
1eb46757d8bb Uploaded
idot
parents:
diff changeset
85 ylab = "pearson correlation coefficient",
1eb46757d8bb Uploaded
idot
parents:
diff changeset
86 legend = list(top=list( fun = dendrogramGrob, args= list(
1eb46757d8bb Uploaded
idot
parents:
diff changeset
87 x=cor.row,
1eb46757d8bb Uploaded
idot
parents:
diff changeset
88 ord = ord,
1eb46757d8bb Uploaded
idot
parents:
diff changeset
89 side = "top"
1eb46757d8bb Uploaded
idot
parents:
diff changeset
90 ))),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
91 main=title
1eb46757d8bb Uploaded
idot
parents:
diff changeset
92 )
1eb46757d8bb Uploaded
idot
parents:
diff changeset
93
1eb46757d8bb Uploaded
idot
parents:
diff changeset
94 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
95
1eb46757d8bb Uploaded
idot
parents:
diff changeset
96 calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
97
1eb46757d8bb Uploaded
idot
parents:
diff changeset
98 ca <- lapply(coverages, covWith0, mappable)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
99
1eb46757d8bb Uploaded
idot
parents:
diff changeset
100 cd <- corrDist(ca, outnames)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
101 write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
102 pdf(outnamePDF)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
103 print(plotCorr(cd, title))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
104 dev.off()
1eb46757d8bb Uploaded
idot
parents:
diff changeset
105
1eb46757d8bb Uploaded
idot
parents:
diff changeset
106 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
107
1eb46757d8bb Uploaded
idot
parents:
diff changeset
108 getCoverage <- function(infile, format){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
109 print(paste("reading", infile, format))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
110 values <- import(infile, format=format, asRangedData = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
111 if(tolower(format) %in% c("bigwig","wig")){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
112 values
1eb46757d8bb Uploaded
idot
parents:
diff changeset
113 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
114 cov <- coverage(values)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
115 gcov <- as(cov, "GRanges")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
116 gcov[score(gcov) > 0]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
117 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
118 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
119
1eb46757d8bb Uploaded
idot
parents:
diff changeset
120
1eb46757d8bb Uploaded
idot
parents:
diff changeset
121 getCoverages <- function(infiles, formats){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
122 apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) })
1eb46757d8bb Uploaded
idot
parents:
diff changeset
123 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
124
1eb46757d8bb Uploaded
idot
parents:
diff changeset
125 getMappable <- function(mappable){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
126 if(is.na(mappable)){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
127 createMappable(coverages)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
128 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
129 import(mappable, format="bed", asRangedData = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
130 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
131 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
132
1eb46757d8bb Uploaded
idot
parents:
diff changeset
133
1eb46757d8bb Uploaded
idot
parents:
diff changeset
134 args <- commandArgs(TRUE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
135
1eb46757d8bb Uploaded
idot
parents:
diff changeset
136 infiles <- unlist(strsplit(args[1], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
137 outnames <- unlist(strsplit(args[2], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
138 formats <- unlist(strsplit(args[3], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
139 outnamePDF <- args[4]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
140 outnameMat <- args[5]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
141 title <- args[6]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
142 mappable <- args[7]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
143
1eb46757d8bb Uploaded
idot
parents:
diff changeset
144 lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
145 lpp <- sapply(lp, paste, sep=" ")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
146 print("parsed input parameters")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
147 print(lpp)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
148
1eb46757d8bb Uploaded
idot
parents:
diff changeset
149
1eb46757d8bb Uploaded
idot
parents:
diff changeset
150 coverages <- getCoverages(infiles, formats)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
151 mappability <- getMappable(mappable)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
152
1eb46757d8bb Uploaded
idot
parents:
diff changeset
153 calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
154
1eb46757d8bb Uploaded
idot
parents:
diff changeset
155
1eb46757d8bb Uploaded
idot
parents:
diff changeset
156
1eb46757d8bb Uploaded
idot
parents:
diff changeset
157