2
|
1 #!/usr/bin/env Rscript
|
|
2 #
|
|
3 #for SGE
|
|
4 #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript
|
|
5 #$ -q solexa.q
|
|
6 #$ -P pipeline
|
|
7 #$ -cwd
|
|
8 #$ -l vf=30G
|
|
9
|
|
10 #args[1] <- comma separated bigwig input files
|
|
11 #args[2] <- comma separated names of files
|
|
12 #args[3] <- comma separated file formats
|
|
13 #args[4] <- output pdf
|
|
14 #args[5] <- output matrix
|
|
15 #args[6] <- title
|
|
16 #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong
|
|
17
|
|
18 options("useFancyQuotes" = FALSE)
|
|
19
|
|
20 library(rtracklayer)
|
|
21 library(lattice)
|
|
22 library(latticeExtra)
|
|
23 library(hexbin)
|
|
24
|
|
25 ## creates the union of coverages i.e. mappable regions
|
|
26 ## todo: add mappability argument
|
|
27 createMappable <- function(coverages){
|
|
28 suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings
|
|
29 }
|
|
30
|
|
31 ## adds mappable regions as 0 coverage to track
|
|
32 covWith0 <- function(cov, mappable){
|
3
|
33 print(paste("cov:",cov))
|
|
34 c0 <- setdiff(mappable, cov)
|
2
|
35 cus <- if(length(c0) > 0){
|
|
36 elementMetadata(c0)$score <- 0
|
|
37 sort(c(c0, cov))
|
|
38 }else{
|
|
39 sort(cov)
|
|
40 }
|
|
41 rc <- Rle(score(cus), ranges(cus)@width)
|
|
42 rc
|
|
43 }
|
|
44
|
|
45 ## unlisted rlelist is too long. Taking weighted mean as approximation.
|
|
46 correlate <- function(rlA, rlB){
|
|
47 weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length)))
|
|
48 }
|
|
49
|
|
50
|
|
51 ## correlation dist
|
|
52 corrDist <- function(covRles, outnames){
|
3
|
53 print("calculating correlation")
|
|
54 vl <- length(covRles)
|
2
|
55 v <- 1:vl
|
|
56 o <- matrix(NA,vl,vl)
|
|
57 colnames(o) <- outnames
|
|
58 rownames(o) <- outnames
|
|
59 tri <- lower.tri(o, diag=FALSE)
|
|
60 indic <- expand.grid(v,v)
|
|
61 pairs <- indic[tri,]
|
|
62 corrs <- apply(pairs,1, function(ins){
|
|
63 index1 <- ins[1]
|
|
64 index2 <- ins[2]
|
|
65 cor(covRles[[index1]], covRles[[index2]])
|
|
66 # correlate(covRles[[index1]], covRles[[index2]])
|
|
67 })
|
|
68
|
|
69 o[lower.tri(o, diag=FALSE)] <- corrs
|
|
70 o[upper.tri(o)] <- t(o)[upper.tri(o)]
|
|
71 diag(o) <- 1
|
|
72 o
|
|
73 }
|
|
74
|
|
75
|
|
76
|
|
77 plotCorr <- function(corrMat, title){
|
|
78 cor.d <- dist(corrMat)
|
|
79 cor.row <- as.dendrogram( hclust( cor.d ))
|
|
80 ord <- order.dendrogram(cor.row)
|
|
81 levelplot(
|
|
82 corrMat[ord,ord],
|
|
83 scales=list(x=list(rot=90)),
|
|
84 colorkey = list(space="left"),
|
|
85 col.regions=BTY(100),
|
|
86 xlab = "",
|
|
87 ylab = "pearson correlation coefficient",
|
|
88 legend = list(top=list( fun = dendrogramGrob, args= list(
|
|
89 x=cor.row,
|
|
90 ord = ord,
|
|
91 side = "top"
|
|
92 ))),
|
|
93 main=title
|
|
94 )
|
|
95
|
|
96 }
|
|
97
|
|
98 calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){
|
|
99
|
|
100 ca <- lapply(coverages, covWith0, mappable)
|
|
101
|
|
102 cd <- corrDist(ca, outnames)
|
|
103 write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE)
|
|
104 pdf(outnamePDF)
|
|
105 print(plotCorr(cd, title))
|
|
106 dev.off()
|
|
107
|
|
108 }
|
|
109
|
|
110 getCoverage <- function(infile, format){
|
|
111 print(paste("reading", infile, format))
|
|
112 values <- import(infile, format=format, asRangedData = FALSE)
|
|
113 if(tolower(format) %in% c("bigwig","wig")){
|
|
114 values
|
|
115 }else{
|
|
116 cov <- coverage(values)
|
|
117 gcov <- as(cov, "GRanges")
|
|
118 gcov[score(gcov) > 0]
|
|
119 }
|
|
120 }
|
|
121
|
|
122
|
|
123 getCoverages <- function(infiles, formats){
|
|
124 apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) })
|
|
125 }
|
|
126
|
|
127 getMappable <- function(mappable){
|
|
128 if(is.na(mappable)){
|
|
129 createMappable(coverages)
|
|
130 }else{
|
|
131 import(mappable, format="bed", asRangedData = FALSE)
|
|
132 }
|
|
133 }
|
|
134
|
|
135
|
|
136 args <- commandArgs(TRUE)
|
|
137
|
|
138 infiles <- unlist(strsplit(args[1], ","))
|
|
139 outnames <- unlist(strsplit(args[2], ","))
|
|
140 formats <- unlist(strsplit(args[3], ","))
|
|
141 outnamePDF <- args[4]
|
|
142 outnameMat <- args[5]
|
|
143 title <- args[6]
|
|
144 mappable <- args[7]
|
|
145
|
|
146 lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable)
|
|
147 lpp <- sapply(lp, paste, sep=" ")
|
|
148 print("parsed input parameters")
|
|
149 print(lpp)
|
|
150
|
|
151
|
|
152 coverages <- getCoverages(infiles, formats)
|
|
153 mappability <- getMappable(mappable)
|
|
154
|
|
155 calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title)
|
|
156
|
|
157
|
|
158
|
|
159
|