annotate corr.R @ 14:5a4cbfb5fbc5 default tip

loading R3.0.2
author Ido Tamir <ido.tamir@imp.ac.at>
date Mon, 27 Oct 2014 10:57:35 +0100
parents de452b96da8e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
1 #!/usr/bin/env Rscript
1eb46757d8bb Uploaded
idot
parents:
diff changeset
2 #
1eb46757d8bb Uploaded
idot
parents:
diff changeset
3 #for SGE
1eb46757d8bb Uploaded
idot
parents:
diff changeset
4 #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript
1eb46757d8bb Uploaded
idot
parents:
diff changeset
5 #$ -q solexa.q
1eb46757d8bb Uploaded
idot
parents:
diff changeset
6 #$ -P pipeline
1eb46757d8bb Uploaded
idot
parents:
diff changeset
7 #$ -cwd
1eb46757d8bb Uploaded
idot
parents:
diff changeset
8 #$ -l vf=30G
1eb46757d8bb Uploaded
idot
parents:
diff changeset
9
1eb46757d8bb Uploaded
idot
parents:
diff changeset
10 #args[1] <- comma separated bigwig input files
1eb46757d8bb Uploaded
idot
parents:
diff changeset
11 #args[2] <- comma separated names of files
1eb46757d8bb Uploaded
idot
parents:
diff changeset
12 #args[3] <- comma separated file formats
1eb46757d8bb Uploaded
idot
parents:
diff changeset
13 #args[4] <- output pdf
1eb46757d8bb Uploaded
idot
parents:
diff changeset
14 #args[5] <- output matrix
1eb46757d8bb Uploaded
idot
parents:
diff changeset
15 #args[6] <- title
1eb46757d8bb Uploaded
idot
parents:
diff changeset
16 #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong
1eb46757d8bb Uploaded
idot
parents:
diff changeset
17
1eb46757d8bb Uploaded
idot
parents:
diff changeset
18 options("useFancyQuotes" = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
19
13
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
20 TEST=FALSE
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
21
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
22 library(rtracklayer)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
23 library(lattice)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
24 library(latticeExtra)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
25 library(hexbin)
13
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
26 library(Rsamtools)
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
27
1eb46757d8bb Uploaded
idot
parents:
diff changeset
28 ## creates the union of coverages i.e. mappable regions
1eb46757d8bb Uploaded
idot
parents:
diff changeset
29 ## todo: add mappability argument
1eb46757d8bb Uploaded
idot
parents:
diff changeset
30 createMappable <- function(coverages){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
31 suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings
1eb46757d8bb Uploaded
idot
parents:
diff changeset
32 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
33
1eb46757d8bb Uploaded
idot
parents:
diff changeset
34 ## adds mappable regions as 0 coverage to track
1eb46757d8bb Uploaded
idot
parents:
diff changeset
35 covWith0 <- function(cov, mappable){
9
ecad2cd5038f debug message was making truble
Ido Tamir <ido.tamir@imp.ac.at>
parents: 6
diff changeset
36 print(paste("cov:", summary(cov)))
3
7bdd29cdfed8 more debug messages
Ido Tamir <ido.tamir@imp.ac.at>
parents: 2
diff changeset
37 c0 <- setdiff(mappable, cov)
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
38 cus <- if(length(c0) > 0){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
39 elementMetadata(c0)$score <- 0
1eb46757d8bb Uploaded
idot
parents:
diff changeset
40 sort(c(c0, cov))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
41 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
42 sort(cov)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
43 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
44 rc <- Rle(score(cus), ranges(cus)@width)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
45 rc
1eb46757d8bb Uploaded
idot
parents:
diff changeset
46 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
47
1eb46757d8bb Uploaded
idot
parents:
diff changeset
48 ## unlisted rlelist is too long. Taking weighted mean as approximation.
1eb46757d8bb Uploaded
idot
parents:
diff changeset
49 correlate <- function(rlA, rlB){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
50 weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length)))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
51 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
52
1eb46757d8bb Uploaded
idot
parents:
diff changeset
53
1eb46757d8bb Uploaded
idot
parents:
diff changeset
54 ## correlation dist
1eb46757d8bb Uploaded
idot
parents:
diff changeset
55 corrDist <- function(covRles, outnames){
3
7bdd29cdfed8 more debug messages
Ido Tamir <ido.tamir@imp.ac.at>
parents: 2
diff changeset
56 print("calculating correlation")
7bdd29cdfed8 more debug messages
Ido Tamir <ido.tamir@imp.ac.at>
parents: 2
diff changeset
57 vl <- length(covRles)
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
58 v <- 1:vl
1eb46757d8bb Uploaded
idot
parents:
diff changeset
59 o <- matrix(NA,vl,vl)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
60 colnames(o) <- outnames
1eb46757d8bb Uploaded
idot
parents:
diff changeset
61 rownames(o) <- outnames
1eb46757d8bb Uploaded
idot
parents:
diff changeset
62 tri <- lower.tri(o, diag=FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
63 indic <- expand.grid(v,v)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
64 pairs <- indic[tri,]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
65 corrs <- apply(pairs,1, function(ins){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
66 index1 <- ins[1]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
67 index2 <- ins[2]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
68 cor(covRles[[index1]], covRles[[index2]])
1eb46757d8bb Uploaded
idot
parents:
diff changeset
69 # correlate(covRles[[index1]], covRles[[index2]])
1eb46757d8bb Uploaded
idot
parents:
diff changeset
70 })
1eb46757d8bb Uploaded
idot
parents:
diff changeset
71
1eb46757d8bb Uploaded
idot
parents:
diff changeset
72 o[lower.tri(o, diag=FALSE)] <- corrs
1eb46757d8bb Uploaded
idot
parents:
diff changeset
73 o[upper.tri(o)] <- t(o)[upper.tri(o)]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
74 diag(o) <- 1
1eb46757d8bb Uploaded
idot
parents:
diff changeset
75 o
1eb46757d8bb Uploaded
idot
parents:
diff changeset
76 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
77
1eb46757d8bb Uploaded
idot
parents:
diff changeset
78
1eb46757d8bb Uploaded
idot
parents:
diff changeset
79
1eb46757d8bb Uploaded
idot
parents:
diff changeset
80 plotCorr <- function(corrMat, title){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
81 cor.d <- dist(corrMat)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
82 cor.row <- as.dendrogram( hclust( cor.d ))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
83 ord <- order.dendrogram(cor.row)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
84 levelplot(
1eb46757d8bb Uploaded
idot
parents:
diff changeset
85 corrMat[ord,ord],
1eb46757d8bb Uploaded
idot
parents:
diff changeset
86 scales=list(x=list(rot=90)),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
87 colorkey = list(space="left"),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
88 col.regions=BTY(100),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
89 xlab = "",
1eb46757d8bb Uploaded
idot
parents:
diff changeset
90 ylab = "pearson correlation coefficient",
1eb46757d8bb Uploaded
idot
parents:
diff changeset
91 legend = list(top=list( fun = dendrogramGrob, args= list(
1eb46757d8bb Uploaded
idot
parents:
diff changeset
92 x=cor.row,
1eb46757d8bb Uploaded
idot
parents:
diff changeset
93 ord = ord,
1eb46757d8bb Uploaded
idot
parents:
diff changeset
94 side = "top"
1eb46757d8bb Uploaded
idot
parents:
diff changeset
95 ))),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
96 main=title
1eb46757d8bb Uploaded
idot
parents:
diff changeset
97 )
1eb46757d8bb Uploaded
idot
parents:
diff changeset
98
1eb46757d8bb Uploaded
idot
parents:
diff changeset
99 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
100
1eb46757d8bb Uploaded
idot
parents:
diff changeset
101 calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
102
1eb46757d8bb Uploaded
idot
parents:
diff changeset
103 ca <- lapply(coverages, covWith0, mappable)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
104
1eb46757d8bb Uploaded
idot
parents:
diff changeset
105 cd <- corrDist(ca, outnames)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
106 write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
107 pdf(outnamePDF)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
108 print(plotCorr(cd, title))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
109 dev.off()
1eb46757d8bb Uploaded
idot
parents:
diff changeset
110
1eb46757d8bb Uploaded
idot
parents:
diff changeset
111 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
112
1eb46757d8bb Uploaded
idot
parents:
diff changeset
113 getCoverage <- function(infile, format){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
114 print(paste("reading", infile, format))
13
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
115 values <- if(format == "bam"){
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
116 import(infile, format=format)
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
117 }else{
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
118 import(infile, format=format, asRangedData = FALSE)
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
119 }
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
120 if(tolower(format) %in% c("bigwig","wig")){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
121 values
1eb46757d8bb Uploaded
idot
parents:
diff changeset
122 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
123 cov <- coverage(values)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
124 gcov <- as(cov, "GRanges")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
125 gcov[score(gcov) > 0]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
126 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
127 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
128
1eb46757d8bb Uploaded
idot
parents:
diff changeset
129
1eb46757d8bb Uploaded
idot
parents:
diff changeset
130 getCoverages <- function(infiles, formats){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
131 apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) })
1eb46757d8bb Uploaded
idot
parents:
diff changeset
132 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
133
6
fcf85568a102 dont know how this worked. added coverages to mappable
Ido Tamir <ido.tamir@imp.ac.at>
parents: 3
diff changeset
134 getMappable <- function(coverages, mappable){
9
ecad2cd5038f debug message was making truble
Ido Tamir <ido.tamir@imp.ac.at>
parents: 6
diff changeset
135 print(paste("creating mappability", length(coverages), mappable))
6
fcf85568a102 dont know how this worked. added coverages to mappable
Ido Tamir <ido.tamir@imp.ac.at>
parents: 3
diff changeset
136 if(is.na(mappable)){
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
137 createMappable(coverages)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
138 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
139 import(mappable, format="bed", asRangedData = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
140 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
141 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
142
11
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
143 #I hate this bioconductor ranges stuff more and more
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
144 #this is a super s**t api
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
145 mergeSeqInfo <- function(coverages){
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
146 seqinfos <- do.call("rbind", lapply(coverages, function(cov){ d <- as.data.frame(cov@seqinfo); d$seqnames <- rownames(d); d}))
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
147 seqinfosMax <- do.call("rbind", by(seqinfos, seqinfos$seqnames, function(byChr){ byChr[order(byChr$seqlengths, decreasing=TRUE),][1,] }))
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
148 data.frame(seqnames=seqinfosMax$seqnames,seqlengths=seqinfosMax$seqlengths, isCircular=seqinfosMax$isCircular, genome=seqinfosMax$genome)
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
149 }
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
150
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
151
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
152 changeSeqInfo <- function(coverages){
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
153 seqinfo <- mergeSeqInfo(coverages)
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
154 lapply(coverages, function(cov){
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
155 si <- as.data.frame(cov@seqinfo)
12
f3e037496c18 its even checking the levels
Ido Tamir <ido.tamir@imp.ac.at>
parents: 11
diff changeset
156 si <- data.frame(chr=rownames(si), si, stringsAsFactors=FALSE)
f3e037496c18 its even checking the levels
Ido Tamir <ido.tamir@imp.ac.at>
parents: 11
diff changeset
157 sis <- subset(seqinfo, seqnames %in% si$chr)
f3e037496c18 its even checking the levels
Ido Tamir <ido.tamir@imp.ac.at>
parents: 11
diff changeset
158 sis <- sis[match(si$chr, sis$seqnames),]
11
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
159 si <- Seqinfo(seqnames=as.character(sis$seqnames), seqlengths=sis$seqlengths, isCircular=sis$isCircular, genome=sis$genome)
12
f3e037496c18 its even checking the levels
Ido Tamir <ido.tamir@imp.ac.at>
parents: 11
diff changeset
160 cov@seqinfo <- si
11
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
161 cov
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
162 })
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
163 }
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
164
1eb46757d8bb Uploaded
idot
parents:
diff changeset
165 args <- commandArgs(TRUE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
166
13
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
167 if(TEST){
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
168 args[1] <- c("file1.bed,file2.bed,file3.bed,file4.bed")
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
169 args[1] <- c("file1.bam,file2.bam,file3.bam,file4.bam")
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
170 args[2] <- c("file1,file2,file3,file4")
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
171 args[3] <- c("bam,bam,bam,bam")
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
172 args[4] <- "out.pdf"
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
173 args[5] <- "mat.mat"
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
174 args[6] <- "title"
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
175 args[7] <- NA
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
176 }
de452b96da8e added bam input
Ido Tamir <ido.tamir@imp.ac.at>
parents: 12
diff changeset
177
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
178 infiles <- unlist(strsplit(args[1], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
179 outnames <- unlist(strsplit(args[2], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
180 formats <- unlist(strsplit(args[3], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
181 outnamePDF <- args[4]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
182 outnameMat <- args[5]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
183 title <- args[6]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
184 mappable <- args[7]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
185
1eb46757d8bb Uploaded
idot
parents:
diff changeset
186 lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
187 lpp <- sapply(lp, paste, sep=" ")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
188 print("parsed input parameters")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
189 print(lpp)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
190
1eb46757d8bb Uploaded
idot
parents:
diff changeset
191
1eb46757d8bb Uploaded
idot
parents:
diff changeset
192 coverages <- getCoverages(infiles, formats)
11
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
193 coverages <- changeSeqInfo(coverages)
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
194
6
fcf85568a102 dont know how this worked. added coverages to mappable
Ido Tamir <ido.tamir@imp.ac.at>
parents: 3
diff changeset
195 mappability <- getMappable(coverages, mappable)
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
196
11
74bfa2464411 the bioconductor GRanges stuff blows
Ido Tamir <ido.tamir@imp.ac.at>
parents: 9
diff changeset
197
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
198 calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
199
1eb46757d8bb Uploaded
idot
parents:
diff changeset
200
1eb46757d8bb Uploaded
idot
parents:
diff changeset
201
1eb46757d8bb Uploaded
idot
parents:
diff changeset
202