atac_primer_tool: find_qPCR_regions.R comparison

comparison find_qPCR_regions.R @ 0:fd3ea97a96bc draft

planemo upload commit 103cb51ec368438642504c3f98b76c363db478bb

author	kyost
date	Sat, 28 Apr 2018 15:07:26 -0400
parents
children	fbfe7b785ea7

comparison

equal deleted inserted replaced

--1:000000000000
+:fd3ea97a96bc
+## Command to run tool:
+# Rscript --vanilla find_qPCR_regions.R o.coverage.bed f9.coverage.bed lib_sizes.txt cor_cutoff cov_cutoff output_file
+# Set up R error handling to go to stderr
+options(show.error.messages=F, error=function(){cat(geterrmessage(),file=stderr());q("no",1,F)})
+# Avoid crashing Galaxy with an UTF8 error on German LC settings
+loc <- Sys.setlocale("LC_MESSAGES", "en_US.UTF-8")
+args <- commandArgs(TRUE)
+o.coverage <- args[1]
+f9.coverage <- args[2]
+lib_sizes <- args[3]
+corr_cutoff <- as.numeric(args[4])
+cov_cutoff <- as.numeric(args[5])
+output_file <- args[6]
+o.coverage.data <- read.delim(o.coverage, header=FALSE)
+f9.coverage.data <- read.delim(f9.coverage, header=FALSE)
+lib.sizes.data <- read.delim(lib_sizes, header=FALSE)
+m <- ncol(o.coverage.data)
+#normalize library sizes to 50 mil reads
+lib.sizes.data <- lib.sizes.data/50000000
+#normalize peak reads and spanning reads by library size
+o.coverage.data[,5:m] <- t(apply(o.coverage.data[,5:m], 1, function(x) x/t(lib.sizes.data)))
+f9.coverage.data[,5:m] <- t(apply(f9.coverage.data[,5:m], 1, function(x) x/t(lib.sizes.data)))
+#returns list of dataframes containing windows for each peak
+split_coverage <- function(a) {
+returnlist <- list()
+temp <- a[1,]
+for (i in 2:nrow(a)) {
+if (temp[1,4] == strsplit(as.character(a[i,4]),"_window")[[1]][1]){
+temp <- rbind(temp, a[i,])
+}else{
+returnlist[[length(returnlist)+1]] <- temp
+temp <- a[i,]
+}
+}
+returnlist[[length(returnlist)+1]] <- temp
+return(returnlist)
+}
+make_cor_plot <- function(a,b, corr_cut, cov_cut, output_file) { #a=o.coverage b=f9.coverage
+asplit <- split_coverage(a)
+bsplit <- split_coverage(b)
+combined_regions <- data.frame()
+for (i in 1:length(asplit)){
+n <- nrow(asplit[[i]])-1
+m <- ncol(asplit[[i]])
+	a_split <- asplit[[i]]
+	b_split <- bsplit[[i]]
+	#calculate correlation between total peak reads and spanning reads in each window
+corb <- data.frame(cor(t(a_split[1,5:m])
+, t(b_split[-1,5:m])
+						   , method = "pearson"))
+data <- data.frame(t(corb), row.names = NULL, stringsAsFactors = FALSE)
+data <- cbind(rep(c(1:n))
+, a_split[-1,1:4]
+, data)
+colnames(data)<-c("position","chr", "start", "stop", "name", "correlation")
+rownames(data)<-NULL
+a_split$average <- rowMeans(a_split[,5:m])
+b_split$average <- rowMeans(b_split[,5:m])
+data2 <- data.frame(cbind(c(1:n), a_split[-1,m+1]), stringsAsFactors=FALSE)
+data3 <- data.frame(cbind(c(1:n), b_split[-1,m+1]), stringsAsFactors=FALSE)
+colnames(data2)<-c("position","averageReadDepth")
+colnames(data3)<-c("position","averageReadDepth")
+	#keep windows for which correlation with total peak reads is above threshold and
+	#number of spanning reads is above cutoff
+regions <- data[which(data$correlation > corr_cut & data3$averageReadDepth > cov_cut),]
+regions <- regions[,2:4]
+	#collapse windows to non-overlappin regions
+if (nrow(regions)>0){
+regions$V4 <- paste(as.character(a_split[1,4]),"_region1",sep="")
+collapsed_regions <- regions[1,]
+nregion <- 1
+if (nrow(regions)>=2){
+		for (j in 2:nrow(regions)) {
+			if (as.numeric(regions[j,2]) <= (as.numeric(regions[j-1,3]))) {
+				collapsed_regions[nrow(collapsed_regions),3] <- regions[j,3]
+			}else{
+				collapsed_regions <- rbind(collapsed_regions, regions[j,])
+				nregion <- nregion + 1
+				collapsed_regions[nrow(collapsed_regions),4] <- paste(as.character(a_split[1,4])
+																	,"_region"
+																	, as.character(nregion)
+																	,sep="")
+			}
+		}
+	}
+	rownames(collapsed_regions) <- NULL
+	combined_regions <- rbind(combined_regions, collapsed_regions)
+}
+}
+write.table(combined_regions
+, output_file
+, sep = "\t"
+, col.names = FALSE
+, row.names = FALSE
+, quote = FALSE)
+print("Successfully found optimal ATAC-qPCR regions.")
+}
+make_cor_plot(o.coverage.data, f9.coverage.data, corr_cutoff, cov_cutoff, output_file)

Mercurial > repos > kyost > atac_primer_tool

comparison find_qPCR_regions.R @ 0:fd3ea97a96bc draft