annotate corr.R @ 10:6df7b07f81c1

better explanation for optional name
author Ido Tamir <ido.tamir@imp.ac.at>
date Sun, 18 Aug 2013 18:10:48 +0200
parents ecad2cd5038f
children 74bfa2464411
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
1 #!/usr/bin/env Rscript
1eb46757d8bb Uploaded
idot
parents:
diff changeset
2 #
1eb46757d8bb Uploaded
idot
parents:
diff changeset
3 #for SGE
1eb46757d8bb Uploaded
idot
parents:
diff changeset
4 #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript
1eb46757d8bb Uploaded
idot
parents:
diff changeset
5 #$ -q solexa.q
1eb46757d8bb Uploaded
idot
parents:
diff changeset
6 #$ -P pipeline
1eb46757d8bb Uploaded
idot
parents:
diff changeset
7 #$ -cwd
1eb46757d8bb Uploaded
idot
parents:
diff changeset
8 #$ -l vf=30G
1eb46757d8bb Uploaded
idot
parents:
diff changeset
9
1eb46757d8bb Uploaded
idot
parents:
diff changeset
10 #args[1] <- comma separated bigwig input files
1eb46757d8bb Uploaded
idot
parents:
diff changeset
11 #args[2] <- comma separated names of files
1eb46757d8bb Uploaded
idot
parents:
diff changeset
12 #args[3] <- comma separated file formats
1eb46757d8bb Uploaded
idot
parents:
diff changeset
13 #args[4] <- output pdf
1eb46757d8bb Uploaded
idot
parents:
diff changeset
14 #args[5] <- output matrix
1eb46757d8bb Uploaded
idot
parents:
diff changeset
15 #args[6] <- title
1eb46757d8bb Uploaded
idot
parents:
diff changeset
16 #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong
1eb46757d8bb Uploaded
idot
parents:
diff changeset
17
1eb46757d8bb Uploaded
idot
parents:
diff changeset
18 options("useFancyQuotes" = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
19
1eb46757d8bb Uploaded
idot
parents:
diff changeset
20 library(rtracklayer)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
21 library(lattice)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
22 library(latticeExtra)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
23 library(hexbin)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
24
1eb46757d8bb Uploaded
idot
parents:
diff changeset
25 ## creates the union of coverages i.e. mappable regions
1eb46757d8bb Uploaded
idot
parents:
diff changeset
26 ## todo: add mappability argument
1eb46757d8bb Uploaded
idot
parents:
diff changeset
27 createMappable <- function(coverages){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
28 suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings
1eb46757d8bb Uploaded
idot
parents:
diff changeset
29 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
30
1eb46757d8bb Uploaded
idot
parents:
diff changeset
31 ## adds mappable regions as 0 coverage to track
1eb46757d8bb Uploaded
idot
parents:
diff changeset
32 covWith0 <- function(cov, mappable){
9
ecad2cd5038f debug message was making truble
Ido Tamir <ido.tamir@imp.ac.at>
parents: 6
diff changeset
33 print(paste("cov:", summary(cov)))
3
7bdd29cdfed8 more debug messages
Ido Tamir <ido.tamir@imp.ac.at>
parents: 2
diff changeset
34 c0 <- setdiff(mappable, cov)
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
35 cus <- if(length(c0) > 0){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
36 elementMetadata(c0)$score <- 0
1eb46757d8bb Uploaded
idot
parents:
diff changeset
37 sort(c(c0, cov))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
38 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
39 sort(cov)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
40 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
41 rc <- Rle(score(cus), ranges(cus)@width)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
42 rc
1eb46757d8bb Uploaded
idot
parents:
diff changeset
43 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
44
1eb46757d8bb Uploaded
idot
parents:
diff changeset
45 ## unlisted rlelist is too long. Taking weighted mean as approximation.
1eb46757d8bb Uploaded
idot
parents:
diff changeset
46 correlate <- function(rlA, rlB){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
47 weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length)))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
48 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
49
1eb46757d8bb Uploaded
idot
parents:
diff changeset
50
1eb46757d8bb Uploaded
idot
parents:
diff changeset
51 ## correlation dist
1eb46757d8bb Uploaded
idot
parents:
diff changeset
52 corrDist <- function(covRles, outnames){
3
7bdd29cdfed8 more debug messages
Ido Tamir <ido.tamir@imp.ac.at>
parents: 2
diff changeset
53 print("calculating correlation")
7bdd29cdfed8 more debug messages
Ido Tamir <ido.tamir@imp.ac.at>
parents: 2
diff changeset
54 vl <- length(covRles)
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
55 v <- 1:vl
1eb46757d8bb Uploaded
idot
parents:
diff changeset
56 o <- matrix(NA,vl,vl)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
57 colnames(o) <- outnames
1eb46757d8bb Uploaded
idot
parents:
diff changeset
58 rownames(o) <- outnames
1eb46757d8bb Uploaded
idot
parents:
diff changeset
59 tri <- lower.tri(o, diag=FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
60 indic <- expand.grid(v,v)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
61 pairs <- indic[tri,]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
62 corrs <- apply(pairs,1, function(ins){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
63 index1 <- ins[1]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
64 index2 <- ins[2]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
65 cor(covRles[[index1]], covRles[[index2]])
1eb46757d8bb Uploaded
idot
parents:
diff changeset
66 # correlate(covRles[[index1]], covRles[[index2]])
1eb46757d8bb Uploaded
idot
parents:
diff changeset
67 })
1eb46757d8bb Uploaded
idot
parents:
diff changeset
68
1eb46757d8bb Uploaded
idot
parents:
diff changeset
69 o[lower.tri(o, diag=FALSE)] <- corrs
1eb46757d8bb Uploaded
idot
parents:
diff changeset
70 o[upper.tri(o)] <- t(o)[upper.tri(o)]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
71 diag(o) <- 1
1eb46757d8bb Uploaded
idot
parents:
diff changeset
72 o
1eb46757d8bb Uploaded
idot
parents:
diff changeset
73 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
74
1eb46757d8bb Uploaded
idot
parents:
diff changeset
75
1eb46757d8bb Uploaded
idot
parents:
diff changeset
76
1eb46757d8bb Uploaded
idot
parents:
diff changeset
77 plotCorr <- function(corrMat, title){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
78 cor.d <- dist(corrMat)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
79 cor.row <- as.dendrogram( hclust( cor.d ))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
80 ord <- order.dendrogram(cor.row)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
81 levelplot(
1eb46757d8bb Uploaded
idot
parents:
diff changeset
82 corrMat[ord,ord],
1eb46757d8bb Uploaded
idot
parents:
diff changeset
83 scales=list(x=list(rot=90)),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
84 colorkey = list(space="left"),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
85 col.regions=BTY(100),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
86 xlab = "",
1eb46757d8bb Uploaded
idot
parents:
diff changeset
87 ylab = "pearson correlation coefficient",
1eb46757d8bb Uploaded
idot
parents:
diff changeset
88 legend = list(top=list( fun = dendrogramGrob, args= list(
1eb46757d8bb Uploaded
idot
parents:
diff changeset
89 x=cor.row,
1eb46757d8bb Uploaded
idot
parents:
diff changeset
90 ord = ord,
1eb46757d8bb Uploaded
idot
parents:
diff changeset
91 side = "top"
1eb46757d8bb Uploaded
idot
parents:
diff changeset
92 ))),
1eb46757d8bb Uploaded
idot
parents:
diff changeset
93 main=title
1eb46757d8bb Uploaded
idot
parents:
diff changeset
94 )
1eb46757d8bb Uploaded
idot
parents:
diff changeset
95
1eb46757d8bb Uploaded
idot
parents:
diff changeset
96 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
97
1eb46757d8bb Uploaded
idot
parents:
diff changeset
98 calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
99
1eb46757d8bb Uploaded
idot
parents:
diff changeset
100 ca <- lapply(coverages, covWith0, mappable)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
101
1eb46757d8bb Uploaded
idot
parents:
diff changeset
102 cd <- corrDist(ca, outnames)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
103 write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
104 pdf(outnamePDF)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
105 print(plotCorr(cd, title))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
106 dev.off()
1eb46757d8bb Uploaded
idot
parents:
diff changeset
107
1eb46757d8bb Uploaded
idot
parents:
diff changeset
108 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
109
1eb46757d8bb Uploaded
idot
parents:
diff changeset
110 getCoverage <- function(infile, format){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
111 print(paste("reading", infile, format))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
112 values <- import(infile, format=format, asRangedData = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
113 if(tolower(format) %in% c("bigwig","wig")){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
114 values
1eb46757d8bb Uploaded
idot
parents:
diff changeset
115 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
116 cov <- coverage(values)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
117 gcov <- as(cov, "GRanges")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
118 gcov[score(gcov) > 0]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
119 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
120 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
121
1eb46757d8bb Uploaded
idot
parents:
diff changeset
122
1eb46757d8bb Uploaded
idot
parents:
diff changeset
123 getCoverages <- function(infiles, formats){
1eb46757d8bb Uploaded
idot
parents:
diff changeset
124 apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) })
1eb46757d8bb Uploaded
idot
parents:
diff changeset
125 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
126
6
fcf85568a102 dont know how this worked. added coverages to mappable
Ido Tamir <ido.tamir@imp.ac.at>
parents: 3
diff changeset
127 getMappable <- function(coverages, mappable){
9
ecad2cd5038f debug message was making truble
Ido Tamir <ido.tamir@imp.ac.at>
parents: 6
diff changeset
128 print(paste("creating mappability", length(coverages), mappable))
6
fcf85568a102 dont know how this worked. added coverages to mappable
Ido Tamir <ido.tamir@imp.ac.at>
parents: 3
diff changeset
129 if(is.na(mappable)){
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
130 createMappable(coverages)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
131 }else{
1eb46757d8bb Uploaded
idot
parents:
diff changeset
132 import(mappable, format="bed", asRangedData = FALSE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
133 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
134 }
1eb46757d8bb Uploaded
idot
parents:
diff changeset
135
1eb46757d8bb Uploaded
idot
parents:
diff changeset
136
1eb46757d8bb Uploaded
idot
parents:
diff changeset
137 args <- commandArgs(TRUE)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
138
1eb46757d8bb Uploaded
idot
parents:
diff changeset
139 infiles <- unlist(strsplit(args[1], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
140 outnames <- unlist(strsplit(args[2], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
141 formats <- unlist(strsplit(args[3], ","))
1eb46757d8bb Uploaded
idot
parents:
diff changeset
142 outnamePDF <- args[4]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
143 outnameMat <- args[5]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
144 title <- args[6]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
145 mappable <- args[7]
1eb46757d8bb Uploaded
idot
parents:
diff changeset
146
1eb46757d8bb Uploaded
idot
parents:
diff changeset
147 lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
148 lpp <- sapply(lp, paste, sep=" ")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
149 print("parsed input parameters")
1eb46757d8bb Uploaded
idot
parents:
diff changeset
150 print(lpp)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
151
1eb46757d8bb Uploaded
idot
parents:
diff changeset
152
1eb46757d8bb Uploaded
idot
parents:
diff changeset
153 coverages <- getCoverages(infiles, formats)
6
fcf85568a102 dont know how this worked. added coverages to mappable
Ido Tamir <ido.tamir@imp.ac.at>
parents: 3
diff changeset
154 mappability <- getMappable(coverages, mappable)
2
1eb46757d8bb Uploaded
idot
parents:
diff changeset
155
1eb46757d8bb Uploaded
idot
parents:
diff changeset
156 calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title)
1eb46757d8bb Uploaded
idot
parents:
diff changeset
157
1eb46757d8bb Uploaded
idot
parents:
diff changeset
158
1eb46757d8bb Uploaded
idot
parents:
diff changeset
159
1eb46757d8bb Uploaded
idot
parents:
diff changeset
160