annotate find_qPCR_regions.R @ 5:3cd53127a838 draft default tip

planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
author kyost
date Fri, 04 May 2018 02:24:47 -0400
parents fbfe7b785ea7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
1 ## Command to run tool:
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
2 # Rscript --vanilla find_qPCR_regions.R o.coverage.bed f9.coverage.bed lib_sizes.txt cor_cutoff cov_cutoff output_file
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
3
5
3cd53127a838 planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents: 3
diff changeset
4 options(warn=-1)
0
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
5
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
6 # Avoid crashing Galaxy with an UTF8 error on German LC settings
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
7 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
8
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
9 args <- commandArgs(TRUE)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
10
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
11 o.coverage <- args[1]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
12 f9.coverage <- args[2]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
13 lib_sizes <- args[3]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
14 corr_cutoff <- as.numeric(args[4])
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
15 cov_cutoff <- as.numeric(args[5])
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
16 output_file <- args[6]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
17
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
18 o.coverage.data <- read.delim(o.coverage, header=FALSE)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
19 f9.coverage.data <- read.delim(f9.coverage, header=FALSE)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
20 lib.sizes.data <- read.delim(lib_sizes, header=FALSE)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
21
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
22 m <- ncol(o.coverage.data)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
23
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
24 #normalize library sizes to 50 mil reads
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
25 lib.sizes.data <- lib.sizes.data/50000000
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
26 #normalize peak reads and spanning reads by library size
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
27 o.coverage.data[,5:m] <- t(apply(o.coverage.data[,5:m], 1, function(x) x/t(lib.sizes.data)))
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
28 f9.coverage.data[,5:m] <- t(apply(f9.coverage.data[,5:m], 1, function(x) x/t(lib.sizes.data)))
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
29
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
30 #returns list of dataframes containing windows for each peak
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
31 split_coverage <- function(a) {
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
32 returnlist <- list()
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
33 temp <- a[1,]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
34 for (i in 2:nrow(a)) {
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
35 if (temp[1,4] == strsplit(as.character(a[i,4]),"_window")[[1]][1]){
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
36 temp <- rbind(temp, a[i,])
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
37 }else{
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
38 returnlist[[length(returnlist)+1]] <- temp
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
39 temp <- a[i,]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
40 }
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
41 }
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
42 returnlist[[length(returnlist)+1]] <- temp
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
43 return(returnlist)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
44 }
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
45
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
46 make_cor_plot <- function(a,b, corr_cut, cov_cut, output_file) { #a=o.coverage b=f9.coverage
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
47
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
48 asplit <- split_coverage(a)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
49 bsplit <- split_coverage(b)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
50
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
51 combined_regions <- data.frame()
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
52
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
53 for (i in 1:length(asplit)){
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
54 n <- nrow(asplit[[i]])-1
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
55 m <- ncol(asplit[[i]])
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
56
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
57 a_split <- asplit[[i]]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
58 b_split <- bsplit[[i]]
3
fbfe7b785ea7 Add test for 0 standard deviation.
kyost
parents: 0
diff changeset
59
0
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
60
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
61 #calculate correlation between total peak reads and spanning reads in each window
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
62 corb <- data.frame(cor(t(a_split[1,5:m])
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
63 , t(b_split[-1,5:m])
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
64 , method = "pearson"))
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
65
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
66 data <- data.frame(t(corb), row.names = NULL, stringsAsFactors = FALSE)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
67
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
68 data <- cbind(rep(c(1:n))
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
69 , a_split[-1,1:4]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
70 , data)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
71 colnames(data)<-c("position","chr", "start", "stop", "name", "correlation")
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
72 rownames(data)<-NULL
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
73 a_split$average <- rowMeans(a_split[,5:m])
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
74 b_split$average <- rowMeans(b_split[,5:m])
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
75 data2 <- data.frame(cbind(c(1:n), a_split[-1,m+1]), stringsAsFactors=FALSE)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
76 data3 <- data.frame(cbind(c(1:n), b_split[-1,m+1]), stringsAsFactors=FALSE)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
77 colnames(data2)<-c("position","averageReadDepth")
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
78 colnames(data3)<-c("position","averageReadDepth")
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
79
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
80 #keep windows for which correlation with total peak reads is above threshold and
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
81 #number of spanning reads is above cutoff
5
3cd53127a838 planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents: 3
diff changeset
82 regions <- data[which(data$correlation >= corr_cut & data3$averageReadDepth >= cov_cut),]
0
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
83 regions <- regions[,2:4]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
84
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
85 #collapse windows to non-overlappin regions
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
86 if (nrow(regions)>0){
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
87 regions$V4 <- paste(as.character(a_split[1,4]),"_region1",sep="")
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
88 collapsed_regions <- regions[1,]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
89 nregion <- 1
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
90 if (nrow(regions)>=2){
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
91 for (j in 2:nrow(regions)) {
5
3cd53127a838 planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents: 3
diff changeset
92 if (as.numeric(regions[j,2]) < (as.numeric(regions[j-1,3]))) {
0
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
93 collapsed_regions[nrow(collapsed_regions),3] <- regions[j,3]
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
94 }else{
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
95 collapsed_regions <- rbind(collapsed_regions, regions[j,])
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
96 nregion <- nregion + 1
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
97 collapsed_regions[nrow(collapsed_regions),4] <- paste(as.character(a_split[1,4])
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
98 ,"_region"
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
99 , as.character(nregion)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
100 ,sep="")
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
101 }
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
102 }
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
103 }
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
104 rownames(collapsed_regions) <- NULL
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
105
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
106 combined_regions <- rbind(combined_regions, collapsed_regions)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
107 }
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
108
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
109
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
110 }
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
111
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
112 write.table(combined_regions
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
113 , output_file
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
114 , sep = "\t"
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
115 , col.names = FALSE
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
116 , row.names = FALSE
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
117 , quote = FALSE)
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
118 print("Successfully found optimal ATAC-qPCR regions.")
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
119 }
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
120
fd3ea97a96bc planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff changeset
121 make_cor_plot(o.coverage.data, f9.coverage.data, corr_cutoff, cov_cutoff, output_file)