diff Contra/scripts/large_region_cbs.R @ 0:7564f3b1e675

Uploaded
author fcaramia
date Thu, 13 Sep 2012 02:31:43 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Contra/scripts/large_region_cbs.R	Thu Sep 13 02:31:43 2012 -0400
@@ -0,0 +1,148 @@
+# ----------------------------------------------------------------------#
+# Copyright (c) 2011, Richard Lupat & Jason Li.
+#
+# > Source License <
+# This file is part of CONTRA.
+#
+#    CONTRA is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    CONTRA is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with CONTRA.  If not, see <http://www.gnu.org/licenses/>.
+#
+# 
+#-----------------------------------------------------------------------#
+# Last Updated : 12 October 2011 16:43PM
+
+library(DNAcopy)
+options <- commandArgs(trailingOnly = T)
+logcov.file = options[1]
+param.small.seg = as.numeric(options[2])
+param.large.seg = as.numeric(options[3])
+param.pval.cutoff = as.numeric(options[4])
+param.pvals.size  = as.numeric(options[5])
+param.call.low	= as.numeric(options[6])
+param.call.high	= as.numeric(options[7])
+bufloc		= options[8]
+
+#param.small.seg = 1
+#param.large.seg = 25
+#param.pval.cutoff = 0.1
+#param.pvals.size  = 0.35
+
+#param.call.low	= -0.3
+#param.call.high= 0.3
+
+
+dat = read.delim(logcov.file, as.is=F, header=T)
+t.id = dat$Targeted.Region.ID
+t.mean = dat$Adjusted.Mean.of.LogRatio
+t.chr = dat$Chr
+
+cna.dat <- CNA(t.mean, t.chr, t.id, data.type="logratio")
+smooth.cna.dat = smooth.CNA(cna.dat)
+
+kmax.range = c(param.large.seg, param.small.seg)
+for (i in kmax.range){
+	out.f = paste(bufloc, "/CBS", i ,".txt", sep="")
+	cna.segment = segment(smooth.cna.dat, kmax = i, verbose = 1)
+	#pdf(paste("segment_", "kmax", i, ".pdf", sep=""))
+	#for (chr.list in unique(t.chr)){
+	#	plotSample(cna.segment, chromlist=chr.list)
+	#}
+	#dev.off()
+
+	#Get Data
+	cna.segment.out <- cna.segment$output
+	cna.segment.start = c()
+	cna.segment.end	  = c()
+	cna.segment.mean  = c()
+	cna.segment.pvals.size = c()
+	cna.segment.calls = c()
+	for (n in 1:nrow(cna.segment.out)){
+		cna.start.id	= cna.segment.out[n,]$loc.start
+		cna.end.id	= cna.segment.out[n,]$loc.end
+		cna.start.coord = dat[dat$Targeted.Region.ID==(cna.start.id),]$OriStCoordinate
+		cna.end.coord	= dat[dat$Targeted.Region.ID==(cna.end.id), ]$OriEndCoordinate
+
+		dat.inrange 		= dat[dat$Targeted.Region.ID<=cna.end.id&dat$Targeted.Region.ID>=cna.start.id,]	
+		cna.segment.logratios 	= dat.inrange$Adjusted.Mean.of.LogRatio
+		cna.segment.pvalues   	= dat.inrange$Adjusted.P.Value
+		segment.pvals.above	= dat.inrange[dat.inrange$Adjusted.P.Value<=param.pval.cutoff,]$Adjusted.P.Value
+
+		segment.pvals.size	= length(segment.pvals.above)/length(cna.segment.pvalues)
+		cna.segment.mean  = c(cna.segment.mean, mean(cna.segment.logratios))
+		cna.segment.start = c(cna.segment.start, cna.start.coord)
+		cna.segment.end	  = c(cna.segment.end , cna.end.coord)
+		cna.segment.pvals.size = c(cna.segment.pvals.size, segment.pvals.size)
+
+		if (segment.pvals.size < param.pvals.size){
+			#No-Call
+			cna.segment.calls = c(cna.segment.calls, "No")
+		} else {
+			m = mean(cna.segment.logratios)
+			if ((m > param.call.low) && (m < param.call.high)){
+				#No-Call
+				cna.segment.calls = c(cna.segment.calls, "No")
+			} else {
+				#Call
+				cna.segment.calls = c(cna.segment.calls, "CNV")
+			}
+		}
+	}	
+
+
+	outdf = data.frame(Chr=cna.segment.out$chrom, Target.Start=cna.segment.out$loc.start, Target.End=cna.segment.out$loc.end, NumberOfTargets=cna.segment.out$num.mark, OriStCoordinate=cna.segment.start, OriEndCoordinate=cna.segment.end, CBS.Mean = cna.segment.out$seg.mean, LogRatios = cna.segment.mean, Above.PValues.Cutoff=cna.segment.pvals.size, Calls = cna.segment.calls)
+
+	write.table(outdf, out.f, sep="\t", quote=F, row.names=F, col.names=T)
+
+}
+
+small.dat.f = paste(bufloc, "/CBS", param.small.seg ,".txt", sep="")
+large.dat.f = paste(bufloc, "/CBS", param.large.seg ,".txt", sep="")
+
+small.dat = read.delim(small.dat.f, as.is=F, header=T)
+large.dat = read.delim(large.dat.f, as.is=F, header=T)
+
+large.nocall    = large.dat[large.dat$Calls=="No",]
+small.call      = small.dat[small.dat$Calls!="No",]
+included.segment= large.dat[large.dat$Calls!="No",]
+
+for (i in 1:nrow(small.call)){
+        small.segment   = small.call[i, ]
+        chr.small       = small.segment$Chr
+        start.small     = small.segment$Target.Start
+        end.small       = small.segment$Target.End
+        match.large.nocall = large.nocall[large.nocall$Chr==chr.small,]
+        
+        match.large.nocall = match.large.nocall[match.large.nocall$Target.End>=start.small,]    
+        match.large.nocall = match.large.nocall[match.large.nocall$Target.Start<=start.small,]   
+        merged.hole        = match.large.nocall[match.large.nocall$Target.End>=start.small,]
+        
+        if (nrow(merged.hole) > 0){
+                included.segment = merge(included.segment, small.segment, all=T)
+        }
+}
+
+out.f = paste(logcov.file, ".LargeDeletion.txt", sep="")
+write.table(included.segment, out.f, sep="\t", quote=F, row.names=F)
+
+
+
+
+
+
+
+
+
+
+
+
+