Mercurial > repos > kyost > atac_primer_tool
comparison find_qPCR_regions.R @ 0:fd3ea97a96bc draft
planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb
author | kyost |
---|---|
date | Sat, 28 Apr 2018 15:07:26 -0400 |
parents | |
children | fbfe7b785ea7 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fd3ea97a96bc |
---|---|
1 ## Command to run tool: | |
2 # Rscript --vanilla find_qPCR_regions.R o.coverage.bed f9.coverage.bed lib_sizes.txt cor_cutoff cov_cutoff output_file | |
3 | |
4 # Set up R error handling to go to stderr | |
5 options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)}) | |
6 | |
7 # Avoid crashing Galaxy with an UTF8 error on German LC settings | |
8 loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") | |
9 | |
10 args <- commandArgs(TRUE) | |
11 | |
12 o.coverage <- args[1] | |
13 f9.coverage <- args[2] | |
14 lib_sizes <- args[3] | |
15 corr_cutoff <- as.numeric(args[4]) | |
16 cov_cutoff <- as.numeric(args[5]) | |
17 output_file <- args[6] | |
18 | |
19 o.coverage.data <- read.delim(o.coverage, header=FALSE) | |
20 f9.coverage.data <- read.delim(f9.coverage, header=FALSE) | |
21 lib.sizes.data <- read.delim(lib_sizes, header=FALSE) | |
22 | |
23 m <- ncol(o.coverage.data) | |
24 | |
25 #normalize library sizes to 50 mil reads | |
26 lib.sizes.data <- lib.sizes.data/50000000 | |
27 #normalize peak reads and spanning reads by library size | |
28 o.coverage.data[,5:m] <- t(apply(o.coverage.data[,5:m], 1, function(x) x/t(lib.sizes.data))) | |
29 f9.coverage.data[,5:m] <- t(apply(f9.coverage.data[,5:m], 1, function(x) x/t(lib.sizes.data))) | |
30 | |
31 #returns list of dataframes containing windows for each peak | |
32 split_coverage <- function(a) { | |
33 returnlist <- list() | |
34 temp <- a[1,] | |
35 for (i in 2:nrow(a)) { | |
36 if (temp[1,4] == strsplit(as.character(a[i,4]),"_window")[[1]][1]){ | |
37 temp <- rbind(temp, a[i,]) | |
38 }else{ | |
39 returnlist[[length(returnlist)+1]] <- temp | |
40 temp <- a[i,] | |
41 } | |
42 } | |
43 returnlist[[length(returnlist)+1]] <- temp | |
44 return(returnlist) | |
45 } | |
46 | |
47 make_cor_plot <- function(a,b, corr_cut, cov_cut, output_file) { #a=o.coverage b=f9.coverage | |
48 | |
49 asplit <- split_coverage(a) | |
50 bsplit <- split_coverage(b) | |
51 | |
52 combined_regions <- data.frame() | |
53 | |
54 for (i in 1:length(asplit)){ | |
55 n <- nrow(asplit[[i]])-1 | |
56 m <- ncol(asplit[[i]]) | |
57 | |
58 a_split <- asplit[[i]] | |
59 b_split <- bsplit[[i]] | |
60 | |
61 #calculate correlation between total peak reads and spanning reads in each window | |
62 corb <- data.frame(cor(t(a_split[1,5:m]) | |
63 , t(b_split[-1,5:m]) | |
64 , method = "pearson")) | |
65 | |
66 data <- data.frame(t(corb), row.names = NULL, stringsAsFactors = FALSE) | |
67 | |
68 data <- cbind(rep(c(1:n)) | |
69 , a_split[-1,1:4] | |
70 , data) | |
71 colnames(data)<-c("position","chr", "start", "stop", "name", "correlation") | |
72 rownames(data)<-NULL | |
73 a_split$average <- rowMeans(a_split[,5:m]) | |
74 b_split$average <- rowMeans(b_split[,5:m]) | |
75 data2 <- data.frame(cbind(c(1:n), a_split[-1,m+1]), stringsAsFactors=FALSE) | |
76 data3 <- data.frame(cbind(c(1:n), b_split[-1,m+1]), stringsAsFactors=FALSE) | |
77 colnames(data2)<-c("position","averageReadDepth") | |
78 colnames(data3)<-c("position","averageReadDepth") | |
79 | |
80 #keep windows for which correlation with total peak reads is above threshold and | |
81 #number of spanning reads is above cutoff | |
82 regions <- data[which(data$correlation > corr_cut & data3$averageReadDepth > cov_cut),] | |
83 regions <- regions[,2:4] | |
84 | |
85 #collapse windows to non-overlappin regions | |
86 if (nrow(regions)>0){ | |
87 regions$V4 <- paste(as.character(a_split[1,4]),"_region1",sep="") | |
88 collapsed_regions <- regions[1,] | |
89 nregion <- 1 | |
90 if (nrow(regions)>=2){ | |
91 for (j in 2:nrow(regions)) { | |
92 if (as.numeric(regions[j,2]) <= (as.numeric(regions[j-1,3]))) { | |
93 collapsed_regions[nrow(collapsed_regions),3] <- regions[j,3] | |
94 }else{ | |
95 collapsed_regions <- rbind(collapsed_regions, regions[j,]) | |
96 nregion <- nregion + 1 | |
97 collapsed_regions[nrow(collapsed_regions),4] <- paste(as.character(a_split[1,4]) | |
98 ,"_region" | |
99 , as.character(nregion) | |
100 ,sep="") | |
101 } | |
102 } | |
103 } | |
104 rownames(collapsed_regions) <- NULL | |
105 | |
106 combined_regions <- rbind(combined_regions, collapsed_regions) | |
107 } | |
108 | |
109 | |
110 } | |
111 | |
112 write.table(combined_regions | |
113 , output_file | |
114 , sep = "\t" | |
115 , col.names = FALSE | |
116 , row.names = FALSE | |
117 , quote = FALSE) | |
118 print("Successfully found optimal ATAC-qPCR regions.") | |
119 } | |
120 | |
121 make_cor_plot(o.coverage.data, f9.coverage.data, corr_cutoff, cov_cutoff, output_file) |