annotate corr.R @ 11:74bfa2464411

the bioconductor GRanges stuff blows
author Ido Tamir <ido.tamir@imp.ac.at>
date Sun, 18 Aug 2013 19:39:52 +0200
parents ecad2cd5038f
children f3e037496c18
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
1 #!/usr/bin/env Rscript
1eb46757d8bb Uploaded
idot
parents:
diff changeset
2 #
1eb46757d8bb Uploaded
idot
parents:
diff changeset
3 #for SGE
1eb46757d8bb Uploaded
idot
parents:
diff changeset
4 #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript
1eb46757d8bb Uploaded
idot
parents:
diff changeset
5 #$ -q solexa.q
1eb46757d8bb Uploaded
idot
parents:
diff changeset
6 #$ -P pipeline
1eb46757d8bb Uploaded
idot
parents:
diff changeset
7 #$ -cwd
1eb46757d8bb Uploaded
idot
parents:
diff changeset
8 #$ -l vf=30G
1eb46757d8bb Uploaded
idot
parents:
diff changeset
9
1eb46757d8bb Uploaded
idot
parents:
diff changeset
10 #args[1] <- comma separated bigwig input files
1eb46757d8bb Uploaded
idot
parents:
diff changeset
11 #args[2] <- comma separated names of files
1eb46757d8bb Uploaded
idot
parents:
diff changeset
12 #args[3] <- comma separated file formats
1eb46757d8bb Uploaded
idot
parents:
diff changeset
13 #args[4] <- output pdf
1eb46757d8bb Uploaded
idot
parents:
diff changeset
14 #args[5] <- output matrix
1eb46757d8bb Uploaded
idot
parents:
diff changeset
15 #args[6] <- title
1eb46757d8bb Uploaded
idot
parents:
diff changeset
16 #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong
1eb46757d8bb Uploaded
idot
parents:
diff changeset
17
1eb46757d8bb Uploaded
idot
parents:
diff changeset
18 options("useFancyQuotes" = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
19
1eb46757d8bb Uploaded
idot
parents:
diff changeset
20 library(rtracklayer)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
21 library(lattice)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
22 library(latticeExtra)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
23 library(hexbin)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
24
1eb46757d8bb Uploaded
idot
parents:
diff changeset
25 ## creates the union of coverages i.e. mappable regions
1eb46757d8bb Uploaded
idot
parents:
diff changeset
26 ## todo: add mappability argument
1eb46757d8bb Uploaded
idot
parents:
diff changeset
27 createMappable <- function(coverages){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
28 suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings
1eb46757d8bb Uploaded
idot
parents:
diff changeset
29 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
30
1eb46757d8bb Uploaded
idot
parents:
diff changeset
31 ## adds mappable regions as 0 coverage to track
1eb46757d8bb Uploaded
idot
parents:
diff changeset
32 covWith0 <- function(cov, mappable){
9
ecad2cd5038f debug message was making truble
Ido Tamir <ido.tamir@imp.ac.at>
parents: 6
diff changeset
33 print(paste("cov:", summary(cov)))
3
7bdd29cdfed8 more debug messages
Ido Tamir <ido.tamir@imp.ac.at>
parents: 2
diff changeset
34 c0 <- setdiff(mappable, cov)
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
35 cus <- if(length(c0) > 0){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
36 elementMetadata(c0)$score <- 0
1eb46757d8bb Uploaded
idot
parents:
diff changeset
37 sort(c(c0, cov))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
38 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
39 sort(cov)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
40 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
41 rc <- Rle(score(cus), ranges(cus)@width)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
42 rc
1eb46757d8bb Uploaded
idot
parents:
diff changeset
43 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
44
1eb46757d8bb Uploaded
idot
parents:
diff changeset
45 ## unlisted rlelist is too long. Taking weighted mean as approximation.
1eb46757d8bb Uploaded
idot
parents:
diff changeset
46 correlate <- function(rlA, rlB){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
47 weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length)))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
48 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
49
1eb46757d8bb Uploaded
idot
parents:
diff changeset
50
1eb46757d8bb Uploaded
idot
parents:
diff changeset
51 ## correlation dist
1eb46757d8bb Uploaded
idot
parents:
diff changeset
52 corrDist <- function(covRles, outnames){
3
7bdd29cdfed8 more debug messages
Ido Tamir <ido.tamir@imp.ac.at>
parents: 2
diff changeset
53 print("calculating correlation")
7bdd29cdfed8 more debug messages
Ido Tamir <ido.tamir@imp.ac.at>
parents: 2
diff changeset
54 vl <- length(covRles)
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
55 v <- 1:vl
1eb46757d8bb Uploaded
idot
parents:
diff changeset
56 o <- matrix(NA,vl,vl)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
57 colnames(o) <- outnames
1eb46757d8bb Uploaded
idot
parents:
diff changeset
58 rownames(o) <- outnames
1eb46757d8bb Uploaded
idot
parents:
diff changeset
59 tri <- lower.tri(o, diag=FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
60 indic <- expand.grid(v,v)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
61 pairs <- indic[tri,]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
62 corrs <- apply(pairs,1, function(ins){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
63 index1 <- ins[1]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
64 index2 <- ins[2]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
65 cor(covRles[[index1]], covRles[[index2]])
1eb46757d8bb Uploaded
idot
parents:
diff changeset
66 # correlate(covRles[[index1]], covRles[[index2]])
1eb46757d8bb Uploaded
idot
parents:
diff changeset
67 })
1eb46757d8bb Uploaded
idot
parents:
diff changeset
68
1eb46757d8bb Uploaded
idot
parents:
diff changeset
69 o[lower.tri(o, diag=FALSE)] <- corrs
1eb46757d8bb Uploaded
idot
parents:
diff changeset
70 o[upper.tri(o)] <- t(o)[upper.tri(o)]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
71 diag(o) <- 1
1eb46757d8bb Uploaded
idot
parents:
diff changeset
72 o
1eb46757d8bb Uploaded
idot
parents:
diff changeset
73 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
74
1eb46757d8bb Uploaded
idot
parents:
diff changeset
75
1eb46757d8bb Uploaded
idot
parents:
diff changeset
76
1eb46757d8bb Uploaded
idot
parents:
diff changeset
77 plotCorr <- function(corrMat, title){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
78 cor.d <- dist(corrMat)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
79 cor.row <- as.dendrogram( hclust( cor.d ))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
80 ord <- order.dendrogram(cor.row)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
81 levelplot(
1eb46757d8bb Uploaded
idot
parents:
diff changeset
82 corrMat[ord,ord],
1eb46757d8bb Uploaded
idot
parents:
diff changeset
83 scales=list(x=list(rot=90)),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
84 colorkey = list(space="left"),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
85 col.regions=BTY(100),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
86 xlab = "",
1eb46757d8bb Uploaded
idot
parents:
diff changeset
87 ylab = "pearson correlation coefficient",
1eb46757d8bb Uploaded
idot
parents:
diff changeset
88 legend = list(top=list( fun = dendrogramGrob, args= list(
1eb46757d8bb Uploaded
idot
parents:
diff changeset
89 x=cor.row,
1eb46757d8bb Uploaded
idot
parents:
diff changeset
90 ord = ord,
1eb46757d8bb Uploaded
idot
parents:
diff changeset
91 side = "top"
1eb46757d8bb Uploaded
idot
parents:
diff changeset
92 ))),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
93 main=title
1eb46757d8bb Uploaded
idot
parents:
diff changeset
94 )
1eb46757d8bb Uploaded
idot
parents:
diff changeset
95
1eb46757d8bb Uploaded
idot
parents:
diff changeset
96 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
97
1eb46757d8bb Uploaded
idot
parents:
diff changeset
98 calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
99
1eb46757d8bb Uploaded
idot
parents:
diff changeset
100 ca <- lapply(coverages, covWith0, mappable)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
101
1eb46757d8bb Uploaded
idot
parents:
diff changeset
102 cd <- corrDist(ca, outnames)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
103 write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
104 pdf(outnamePDF)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
105 print(plotCorr(cd, title))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
106 dev.off()
1eb46757d8bb Uploaded
idot
parents:
diff changeset
107
1eb46757d8bb Uploaded
idot
parents:
diff changeset
108 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
109
1eb46757d8bb Uploaded
idot
parents:
diff changeset
110 getCoverage <- function(infile, format){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
111 print(paste("reading", infile, format))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
112 values <- import(infile, format=format, asRangedData = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
113 if(tolower(format) %in% c("bigwig","wig")){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
114 values
1eb46757d8bb Uploaded
idot
parents:
diff changeset
115 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
116 cov <- coverage(values)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
117 gcov <- as(cov, "GRanges")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
118 gcov[score(gcov) > 0]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
119 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
120 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
121
1eb46757d8bb Uploaded
idot
parents:
diff changeset
122
1eb46757d8bb Uploaded
idot
parents:
diff changeset
123 getCoverages <- function(infiles, formats){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
124 apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) })
1eb46757d8bb Uploaded
idot
parents:
diff changeset
125 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
126
6
fcf85568a102 dont know how this worked. added coverages to mappable
Ido Tamir <ido.tamir@imp.ac.at>
parents: 3
diff changeset
127 getMappable <- function(coverages, mappable){
9
ecad2cd5038f debug message was making truble
Ido Tamir <ido.tamir@imp.ac.at>
parents: 6
diff changeset
128 print(paste("creating mappability", length(coverages), mappable))
6
fcf85568a102 dont know how this worked. added coverages to mappable
Ido Tamir <ido.tamir@imp.ac.at>
parents: 3
diff changeset
129 if(is.na(mappable)){
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
130 createMappable(coverages)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
131 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
132 import(mappable, format="bed", asRangedData = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
133 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
134 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
135
11
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
136 #I hate this bioconductor ranges stuff more and more
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
137 #this is a super s**t api
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
138 mergeSeqInfo <- function(coverages){
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
139 seqinfos <- do.call("rbind", lapply(coverages, function(cov){ d <- as.data.frame(cov@seqinfo); d$seqnames <- rownames(d); d}))
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
140 seqinfosMax <- do.call("rbind", by(seqinfos, seqinfos$seqnames, function(byChr){ byChr[order(byChr$seqlengths, decreasing=TRUE),][1,] }))
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
141 data.frame(seqnames=seqinfosMax$seqnames,seqlengths=seqinfosMax$seqlengths, isCircular=seqinfosMax$isCircular, genome=seqinfosMax$genome)
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
142 }
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
143
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
144
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
145 changeSeqInfo <- function(coverages){
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
146 seqinfo <- mergeSeqInfo(coverages)
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
147 lapply(coverages, function(cov){
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
148 si <- as.data.frame(cov@seqinfo)
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
149 si <- data.frame(chr=rownames(si), si)
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
150 sis <- subset(seqinfo, seqnames %in% si$chr)
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
151 si <- Seqinfo(seqnames=as.character(sis$seqnames), seqlengths=sis$seqlengths, isCircular=sis$isCircular, genome=sis$genome)
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
152 cov@seqinfo <- si
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
153 cov
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
154 })
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
155 }
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
156
1eb46757d8bb Uploaded
idot
parents:
diff changeset
157 args <- commandArgs(TRUE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
158
1eb46757d8bb Uploaded
idot
parents:
diff changeset
159 infiles <- unlist(strsplit(args[1], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
160 outnames <- unlist(strsplit(args[2], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
161 formats <- unlist(strsplit(args[3], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
162 outnamePDF <- args[4]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
163 outnameMat <- args[5]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
164 title <- args[6]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
165 mappable <- args[7]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
166
1eb46757d8bb Uploaded
idot
parents:
diff changeset
167 lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
168 lpp <- sapply(lp, paste, sep=" ")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
169 print("parsed input parameters")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
170 print(lpp)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
171
1eb46757d8bb Uploaded
idot
parents:
diff changeset
172
1eb46757d8bb Uploaded
idot
parents:
diff changeset
173 coverages <- getCoverages(infiles, formats)
11
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
174 coverages <- changeSeqInfo(coverages)
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
175
6
fcf85568a102 dont know how this worked. added coverages to mappable
Ido Tamir <ido.tamir@imp.ac.at>
parents: 3
diff changeset
176 mappability <- getMappable(coverages, mappable)
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
177
11
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
178
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
179 calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
180
1eb46757d8bb Uploaded
idot
parents:
diff changeset
181
1eb46757d8bb Uploaded
idot
parents:
diff changeset
182
1eb46757d8bb Uploaded
idot
parents:
diff changeset
183