Mercurial > repos > kyost > atac_primer_tool
annotate find_qPCR_regions.R @ 3:fbfe7b785ea7 draft
Add test for 0 standard deviation.
author | kyost |
---|---|
date | Wed, 02 May 2018 13:20:01 -0400 |
parents | fd3ea97a96bc |
children | 3cd53127a838 |
rev | line source |
---|---|
0
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
1 ## Command to run tool: |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
2 # Rscript --vanilla find_qPCR_regions.R o.coverage.bed f9.coverage.bed lib_sizes.txt cor_cutoff cov_cutoff output_file |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
3 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
4 # Set up R error handling to go to stderr |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
5 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
6 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
7 # Avoid crashing Galaxy with an UTF8 error on German LC settings |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
9 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
10 args <- commandArgs(TRUE) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
11 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
12 o.coverage <- args[1] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
13 f9.coverage <- args[2] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
14 lib_sizes <- args[3] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
15 corr_cutoff <- as.numeric(args[4]) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
16 cov_cutoff <- as.numeric(args[5]) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
17 output_file <- args[6] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
18 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
19 o.coverage.data <- read.delim(o.coverage, header=FALSE) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
20 f9.coverage.data <- read.delim(f9.coverage, header=FALSE) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
21 lib.sizes.data <- read.delim(lib_sizes, header=FALSE) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
22 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
23 m <- ncol(o.coverage.data) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
24 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
25 #normalize library sizes to 50 mil reads |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
26 lib.sizes.data <- lib.sizes.data/50000000 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
27 #normalize peak reads and spanning reads by library size |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
28 o.coverage.data[,5:m] <- t(apply(o.coverage.data[,5:m], 1, function(x) x/t(lib.sizes.data))) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
29 f9.coverage.data[,5:m] <- t(apply(f9.coverage.data[,5:m], 1, function(x) x/t(lib.sizes.data))) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
30 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
31 #returns list of dataframes containing windows for each peak |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
32 split_coverage <- function(a) { |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
33 returnlist <- list() |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
34 temp <- a[1,] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
35 for (i in 2:nrow(a)) { |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
36 if (temp[1,4] == strsplit(as.character(a[i,4]),"_window")[[1]][1]){ |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
37 temp <- rbind(temp, a[i,]) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
38 }else{ |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
39 returnlist[[length(returnlist)+1]] <- temp |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
40 temp <- a[i,] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
41 } |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
42 } |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
43 returnlist[[length(returnlist)+1]] <- temp |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
44 return(returnlist) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
45 } |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
46 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
47 make_cor_plot <- function(a,b, corr_cut, cov_cut, output_file) { #a=o.coverage b=f9.coverage |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
48 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
49 asplit <- split_coverage(a) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
50 bsplit <- split_coverage(b) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
51 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
52 combined_regions <- data.frame() |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
53 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
54 for (i in 1:length(asplit)){ |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
55 n <- nrow(asplit[[i]])-1 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
56 m <- ncol(asplit[[i]]) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
57 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
58 a_split <- asplit[[i]] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
59 b_split <- bsplit[[i]] |
3 | 60 |
61 if (sd(t(a_split[1,5:m])) == 0 || sd(t(b_split[-1,5:m])) == 0 ){ | |
62 next | |
63 } | |
0
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
64 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
65 #calculate correlation between total peak reads and spanning reads in each window |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
66 corb <- data.frame(cor(t(a_split[1,5:m]) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
67 , t(b_split[-1,5:m]) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
68 , method = "pearson")) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
69 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
70 data <- data.frame(t(corb), row.names = NULL, stringsAsFactors = FALSE) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
71 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
72 data <- cbind(rep(c(1:n)) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
73 , a_split[-1,1:4] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
74 , data) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
75 colnames(data)<-c("position","chr", "start", "stop", "name", "correlation") |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
76 rownames(data)<-NULL |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
77 a_split$average <- rowMeans(a_split[,5:m]) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
78 b_split$average <- rowMeans(b_split[,5:m]) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
79 data2 <- data.frame(cbind(c(1:n), a_split[-1,m+1]), stringsAsFactors=FALSE) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
80 data3 <- data.frame(cbind(c(1:n), b_split[-1,m+1]), stringsAsFactors=FALSE) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
81 colnames(data2)<-c("position","averageReadDepth") |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
82 colnames(data3)<-c("position","averageReadDepth") |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
83 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
84 #keep windows for which correlation with total peak reads is above threshold and |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
85 #number of spanning reads is above cutoff |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
86 regions <- data[which(data$correlation > corr_cut & data3$averageReadDepth > cov_cut),] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
87 regions <- regions[,2:4] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
88 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
89 #collapse windows to non-overlappin regions |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
90 if (nrow(regions)>0){ |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
91 regions$V4 <- paste(as.character(a_split[1,4]),"_region1",sep="") |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
92 collapsed_regions <- regions[1,] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
93 nregion <- 1 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
94 if (nrow(regions)>=2){ |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
95 for (j in 2:nrow(regions)) { |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
96 if (as.numeric(regions[j,2]) <= (as.numeric(regions[j-1,3]))) { |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
97 collapsed_regions[nrow(collapsed_regions),3] <- regions[j,3] |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
98 }else{ |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
99 collapsed_regions <- rbind(collapsed_regions, regions[j,]) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
100 nregion <- nregion + 1 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
101 collapsed_regions[nrow(collapsed_regions),4] <- paste(as.character(a_split[1,4]) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
102 ,"_region" |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
103 , as.character(nregion) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
104 ,sep="") |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
105 } |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
106 } |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
107 } |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
108 rownames(collapsed_regions) <- NULL |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
109 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
110 combined_regions <- rbind(combined_regions, collapsed_regions) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
111 } |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
112 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
113 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
114 } |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
115 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
116 write.table(combined_regions |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
117 , output_file |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
118 , sep = "\t" |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
119 , col.names = FALSE |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
120 , row.names = FALSE |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
121 , quote = FALSE) |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
122 print("Successfully found optimal ATAC-qPCR regions.") |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
123 } |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
124 |
fd3ea97a96bc
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
kyost
parents:
diff
changeset
|
125 make_cor_plot(o.coverage.data, f9.coverage.data, corr_cutoff, cov_cutoff, output_file) |