2
|
1 #!/usr/bin/env Rscript
|
|
2 #
|
|
3 #for SGE
|
|
4 #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript
|
|
5 #$ -q solexa.q
|
|
6 #$ -P pipeline
|
|
7 #$ -cwd
|
|
8 #$ -l vf=30G
|
|
9
|
|
10 #args[1] <- comma separated bigwig input files
|
|
11 #args[2] <- comma separated names of files
|
|
12 #args[3] <- comma separated file formats
|
|
13 #args[4] <- output pdf
|
|
14 #args[5] <- output matrix
|
|
15 #args[6] <- title
|
|
16 #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong
|
|
17
|
|
18 options("useFancyQuotes" = FALSE)
|
|
19
|
|
20 library(rtracklayer)
|
|
21 library(lattice)
|
|
22 library(latticeExtra)
|
|
23 library(hexbin)
|
|
24
|
|
25 ## creates the union of coverages i.e. mappable regions
|
|
26 ## todo: add mappability argument
|
|
27 createMappable <- function(coverages){
|
|
28 suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings
|
|
29 }
|
|
30
|
|
31 ## adds mappable regions as 0 coverage to track
|
|
32 covWith0 <- function(cov, mappable){
|
|
33 c0 <- setdiff(mappable, cov)
|
|
34 cus <- if(length(c0) > 0){
|
|
35 elementMetadata(c0)$score <- 0
|
|
36 sort(c(c0, cov))
|
|
37 }else{
|
|
38 sort(cov)
|
|
39 }
|
|
40 rc <- Rle(score(cus), ranges(cus)@width)
|
|
41 rc
|
|
42 }
|
|
43
|
|
44 ## unlisted rlelist is too long. Taking weighted mean as approximation.
|
|
45 correlate <- function(rlA, rlB){
|
|
46 weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length)))
|
|
47 }
|
|
48
|
|
49
|
|
50 ## correlation dist
|
|
51 corrDist <- function(covRles, outnames){
|
|
52 vl <- length(covRles)
|
|
53 v <- 1:vl
|
|
54 o <- matrix(NA,vl,vl)
|
|
55 colnames(o) <- outnames
|
|
56 rownames(o) <- outnames
|
|
57 tri <- lower.tri(o, diag=FALSE)
|
|
58 indic <- expand.grid(v,v)
|
|
59 pairs <- indic[tri,]
|
|
60 corrs <- apply(pairs,1, function(ins){
|
|
61 index1 <- ins[1]
|
|
62 index2 <- ins[2]
|
|
63 cor(covRles[[index1]], covRles[[index2]])
|
|
64 # correlate(covRles[[index1]], covRles[[index2]])
|
|
65 })
|
|
66
|
|
67 o[lower.tri(o, diag=FALSE)] <- corrs
|
|
68 o[upper.tri(o)] <- t(o)[upper.tri(o)]
|
|
69 diag(o) <- 1
|
|
70 o
|
|
71 }
|
|
72
|
|
73
|
|
74
|
|
75 plotCorr <- function(corrMat, title){
|
|
76 cor.d <- dist(corrMat)
|
|
77 cor.row <- as.dendrogram( hclust( cor.d ))
|
|
78 ord <- order.dendrogram(cor.row)
|
|
79 levelplot(
|
|
80 corrMat[ord,ord],
|
|
81 scales=list(x=list(rot=90)),
|
|
82 colorkey = list(space="left"),
|
|
83 col.regions=BTY(100),
|
|
84 xlab = "",
|
|
85 ylab = "pearson correlation coefficient",
|
|
86 legend = list(top=list( fun = dendrogramGrob, args= list(
|
|
87 x=cor.row,
|
|
88 ord = ord,
|
|
89 side = "top"
|
|
90 ))),
|
|
91 main=title
|
|
92 )
|
|
93
|
|
94 }
|
|
95
|
|
96 calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){
|
|
97
|
|
98 ca <- lapply(coverages, covWith0, mappable)
|
|
99
|
|
100 cd <- corrDist(ca, outnames)
|
|
101 write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE)
|
|
102 pdf(outnamePDF)
|
|
103 print(plotCorr(cd, title))
|
|
104 dev.off()
|
|
105
|
|
106 }
|
|
107
|
|
108 getCoverage <- function(infile, format){
|
|
109 print(paste("reading", infile, format))
|
|
110 values <- import(infile, format=format, asRangedData = FALSE)
|
|
111 if(tolower(format) %in% c("bigwig","wig")){
|
|
112 values
|
|
113 }else{
|
|
114 cov <- coverage(values)
|
|
115 gcov <- as(cov, "GRanges")
|
|
116 gcov[score(gcov) > 0]
|
|
117 }
|
|
118 }
|
|
119
|
|
120
|
|
121 getCoverages <- function(infiles, formats){
|
|
122 apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) })
|
|
123 }
|
|
124
|
|
125 getMappable <- function(mappable){
|
|
126 if(is.na(mappable)){
|
|
127 createMappable(coverages)
|
|
128 }else{
|
|
129 import(mappable, format="bed", asRangedData = FALSE)
|
|
130 }
|
|
131 }
|
|
132
|
|
133
|
|
134 args <- commandArgs(TRUE)
|
|
135
|
|
136 infiles <- unlist(strsplit(args[1], ","))
|
|
137 outnames <- unlist(strsplit(args[2], ","))
|
|
138 formats <- unlist(strsplit(args[3], ","))
|
|
139 outnamePDF <- args[4]
|
|
140 outnameMat <- args[5]
|
|
141 title <- args[6]
|
|
142 mappable <- args[7]
|
|
143
|
|
144 lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable)
|
|
145 lpp <- sapply(lp, paste, sep=" ")
|
|
146 print("parsed input parameters")
|
|
147 print(lpp)
|
|
148
|
|
149
|
|
150 coverages <- getCoverages(infiles, formats)
|
|
151 mappability <- getMappable(mappable)
|
|
152
|
|
153 calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title)
|
|
154
|
|
155
|
|
156
|
|
157
|