Mercurial > repos > idot > coverage_correlation
comparison corr.R @ 2:1eb46757d8bb
Uploaded
author | idot |
---|---|
date | Sun, 18 Aug 2013 09:42:48 -0400 |
parents | |
children | 7bdd29cdfed8 |
comparison
equal
deleted
inserted
replaced
1:f106b52086b0 | 2:1eb46757d8bb |
---|---|
1 #!/usr/bin/env Rscript | |
2 # | |
3 #for SGE | |
4 #$ -S /biosw/debian5-x86_64/R/3.0.0/bin/Rscript | |
5 #$ -q solexa.q | |
6 #$ -P pipeline | |
7 #$ -cwd | |
8 #$ -l vf=30G | |
9 | |
10 #args[1] <- comma separated bigwig input files | |
11 #args[2] <- comma separated names of files | |
12 #args[3] <- comma separated file formats | |
13 #args[4] <- output pdf | |
14 #args[5] <- output matrix | |
15 #args[6] <- title | |
16 #args[7] <- optional mappability bed file #not implemented because we have to change the coverage function (covWith0) to 1. intersection then diff because mappability can be wrong | |
17 | |
18 options("useFancyQuotes" = FALSE) | |
19 | |
20 library(rtracklayer) | |
21 library(lattice) | |
22 library(latticeExtra) | |
23 library(hexbin) | |
24 | |
25 ## creates the union of coverages i.e. mappable regions | |
26 ## todo: add mappability argument | |
27 createMappable <- function(coverages){ | |
28 suppressWarnings(Reduce(union, coverages)) #supress because missing chromosomes spit warinings | |
29 } | |
30 | |
31 ## adds mappable regions as 0 coverage to track | |
32 covWith0 <- function(cov, mappable){ | |
33 c0 <- setdiff(mappable, cov) | |
34 cus <- if(length(c0) > 0){ | |
35 elementMetadata(c0)$score <- 0 | |
36 sort(c(c0, cov)) | |
37 }else{ | |
38 sort(cov) | |
39 } | |
40 rc <- Rle(score(cus), ranges(cus)@width) | |
41 rc | |
42 } | |
43 | |
44 ## unlisted rlelist is too long. Taking weighted mean as approximation. | |
45 correlate <- function(rlA, rlB){ | |
46 weighted.mean(cor(rlA, rlB), w=unlist(lapply(rlA, length))) | |
47 } | |
48 | |
49 | |
50 ## correlation dist | |
51 corrDist <- function(covRles, outnames){ | |
52 vl <- length(covRles) | |
53 v <- 1:vl | |
54 o <- matrix(NA,vl,vl) | |
55 colnames(o) <- outnames | |
56 rownames(o) <- outnames | |
57 tri <- lower.tri(o, diag=FALSE) | |
58 indic <- expand.grid(v,v) | |
59 pairs <- indic[tri,] | |
60 corrs <- apply(pairs,1, function(ins){ | |
61 index1 <- ins[1] | |
62 index2 <- ins[2] | |
63 cor(covRles[[index1]], covRles[[index2]]) | |
64 # correlate(covRles[[index1]], covRles[[index2]]) | |
65 }) | |
66 | |
67 o[lower.tri(o, diag=FALSE)] <- corrs | |
68 o[upper.tri(o)] <- t(o)[upper.tri(o)] | |
69 diag(o) <- 1 | |
70 o | |
71 } | |
72 | |
73 | |
74 | |
75 plotCorr <- function(corrMat, title){ | |
76 cor.d <- dist(corrMat) | |
77 cor.row <- as.dendrogram( hclust( cor.d )) | |
78 ord <- order.dendrogram(cor.row) | |
79 levelplot( | |
80 corrMat[ord,ord], | |
81 scales=list(x=list(rot=90)), | |
82 colorkey = list(space="left"), | |
83 col.regions=BTY(100), | |
84 xlab = "", | |
85 ylab = "pearson correlation coefficient", | |
86 legend = list(top=list( fun = dendrogramGrob, args= list( | |
87 x=cor.row, | |
88 ord = ord, | |
89 side = "top" | |
90 ))), | |
91 main=title | |
92 ) | |
93 | |
94 } | |
95 | |
96 calcAndPlot <- function(coverages, outnames, mappable, outnamePDF, outnameMat, title){ | |
97 | |
98 ca <- lapply(coverages, covWith0, mappable) | |
99 | |
100 cd <- corrDist(ca, outnames) | |
101 write.table(cd, outnameMat, quote=FALSE, sep="\t", col.names=FALSE) | |
102 pdf(outnamePDF) | |
103 print(plotCorr(cd, title)) | |
104 dev.off() | |
105 | |
106 } | |
107 | |
108 getCoverage <- function(infile, format){ | |
109 print(paste("reading", infile, format)) | |
110 values <- import(infile, format=format, asRangedData = FALSE) | |
111 if(tolower(format) %in% c("bigwig","wig")){ | |
112 values | |
113 }else{ | |
114 cov <- coverage(values) | |
115 gcov <- as(cov, "GRanges") | |
116 gcov[score(gcov) > 0] | |
117 } | |
118 } | |
119 | |
120 | |
121 getCoverages <- function(infiles, formats){ | |
122 apply(data.frame(file=infiles,format=formats),1, function(row){ getCoverage(row['file'], row['format']) }) | |
123 } | |
124 | |
125 getMappable <- function(mappable){ | |
126 if(is.na(mappable)){ | |
127 createMappable(coverages) | |
128 }else{ | |
129 import(mappable, format="bed", asRangedData = FALSE) | |
130 } | |
131 } | |
132 | |
133 | |
134 args <- commandArgs(TRUE) | |
135 | |
136 infiles <- unlist(strsplit(args[1], ",")) | |
137 outnames <- unlist(strsplit(args[2], ",")) | |
138 formats <- unlist(strsplit(args[3], ",")) | |
139 outnamePDF <- args[4] | |
140 outnameMat <- args[5] | |
141 title <- args[6] | |
142 mappable <- args[7] | |
143 | |
144 lp <- list(infiles,outnames,formats,outnamePDF,outnameMat,title,mappable) | |
145 lpp <- sapply(lp, paste, sep=" ") | |
146 print("parsed input parameters") | |
147 print(lpp) | |
148 | |
149 | |
150 coverages <- getCoverages(infiles, formats) | |
151 mappability <- getMappable(mappable) | |
152 | |
153 calcAndPlot(coverages, outnames, mappability, outnamePDF, outnameMat,title) | |
154 | |
155 | |
156 | |
157 |