annotate Contra/scripts/large_region_cbs.R @ 3:94362f37962e

Uploaded
author fcaramia
date Thu, 13 Sep 2012 02:43:53 -0400
parents 7564f3b1e675
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
1 # ----------------------------------------------------------------------#
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
2 # Copyright (c) 2011, Richard Lupat & Jason Li.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
3 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
4 # > Source License <
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
5 # This file is part of CONTRA.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
6 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
7 # CONTRA is free software: you can redistribute it and/or modify
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
8 # it under the terms of the GNU General Public License as published by
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
10 # (at your option) any later version.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
11 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
12 # CONTRA is distributed in the hope that it will be useful,
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
15 # GNU General Public License for more details.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
16 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
17 # You should have received a copy of the GNU General Public License
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
19 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
20 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
21 #-----------------------------------------------------------------------#
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
22 # Last Updated : 12 October 2011 16:43PM
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
23
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
24 library(DNAcopy)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
25 options <- commandArgs(trailingOnly = T)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
26 logcov.file = options[1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
27 param.small.seg = as.numeric(options[2])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
28 param.large.seg = as.numeric(options[3])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
29 param.pval.cutoff = as.numeric(options[4])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
30 param.pvals.size = as.numeric(options[5])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
31 param.call.low = as.numeric(options[6])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
32 param.call.high = as.numeric(options[7])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
33 bufloc = options[8]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
34
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
35 #param.small.seg = 1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
36 #param.large.seg = 25
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
37 #param.pval.cutoff = 0.1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
38 #param.pvals.size = 0.35
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
39
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
40 #param.call.low = -0.3
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
41 #param.call.high= 0.3
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
42
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
43
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
44 dat = read.delim(logcov.file, as.is=F, header=T)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
45 t.id = dat$Targeted.Region.ID
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
46 t.mean = dat$Adjusted.Mean.of.LogRatio
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
47 t.chr = dat$Chr
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
48
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
49 cna.dat <- CNA(t.mean, t.chr, t.id, data.type="logratio")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
50 smooth.cna.dat = smooth.CNA(cna.dat)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
51
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
52 kmax.range = c(param.large.seg, param.small.seg)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
53 for (i in kmax.range){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
54 out.f = paste(bufloc, "/CBS", i ,".txt", sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
55 cna.segment = segment(smooth.cna.dat, kmax = i, verbose = 1)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
56 #pdf(paste("segment_", "kmax", i, ".pdf", sep=""))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
57 #for (chr.list in unique(t.chr)){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
58 # plotSample(cna.segment, chromlist=chr.list)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
59 #}
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
60 #dev.off()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
61
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
62 #Get Data
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
63 cna.segment.out <- cna.segment$output
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
64 cna.segment.start = c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
65 cna.segment.end = c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
66 cna.segment.mean = c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
67 cna.segment.pvals.size = c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
68 cna.segment.calls = c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
69 for (n in 1:nrow(cna.segment.out)){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
70 cna.start.id = cna.segment.out[n,]$loc.start
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
71 cna.end.id = cna.segment.out[n,]$loc.end
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
72 cna.start.coord = dat[dat$Targeted.Region.ID==(cna.start.id),]$OriStCoordinate
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
73 cna.end.coord = dat[dat$Targeted.Region.ID==(cna.end.id), ]$OriEndCoordinate
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
74
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
75 dat.inrange = dat[dat$Targeted.Region.ID<=cna.end.id&dat$Targeted.Region.ID>=cna.start.id,]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
76 cna.segment.logratios = dat.inrange$Adjusted.Mean.of.LogRatio
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
77 cna.segment.pvalues = dat.inrange$Adjusted.P.Value
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
78 segment.pvals.above = dat.inrange[dat.inrange$Adjusted.P.Value<=param.pval.cutoff,]$Adjusted.P.Value
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
79
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
80 segment.pvals.size = length(segment.pvals.above)/length(cna.segment.pvalues)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
81 cna.segment.mean = c(cna.segment.mean, mean(cna.segment.logratios))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
82 cna.segment.start = c(cna.segment.start, cna.start.coord)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
83 cna.segment.end = c(cna.segment.end , cna.end.coord)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
84 cna.segment.pvals.size = c(cna.segment.pvals.size, segment.pvals.size)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
85
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
86 if (segment.pvals.size < param.pvals.size){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
87 #No-Call
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
88 cna.segment.calls = c(cna.segment.calls, "No")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
89 } else {
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
90 m = mean(cna.segment.logratios)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
91 if ((m > param.call.low) && (m < param.call.high)){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
92 #No-Call
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
93 cna.segment.calls = c(cna.segment.calls, "No")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
94 } else {
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
95 #Call
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
96 cna.segment.calls = c(cna.segment.calls, "CNV")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
97 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
98 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
99 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
100
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
101
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
102 outdf = data.frame(Chr=cna.segment.out$chrom, Target.Start=cna.segment.out$loc.start, Target.End=cna.segment.out$loc.end, NumberOfTargets=cna.segment.out$num.mark, OriStCoordinate=cna.segment.start, OriEndCoordinate=cna.segment.end, CBS.Mean = cna.segment.out$seg.mean, LogRatios = cna.segment.mean, Above.PValues.Cutoff=cna.segment.pvals.size, Calls = cna.segment.calls)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
103
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
104 write.table(outdf, out.f, sep="\t", quote=F, row.names=F, col.names=T)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
105
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
106 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
107
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
108 small.dat.f = paste(bufloc, "/CBS", param.small.seg ,".txt", sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
109 large.dat.f = paste(bufloc, "/CBS", param.large.seg ,".txt", sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
110
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
111 small.dat = read.delim(small.dat.f, as.is=F, header=T)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
112 large.dat = read.delim(large.dat.f, as.is=F, header=T)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
113
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
114 large.nocall = large.dat[large.dat$Calls=="No",]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
115 small.call = small.dat[small.dat$Calls!="No",]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
116 included.segment= large.dat[large.dat$Calls!="No",]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
117
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
118 for (i in 1:nrow(small.call)){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
119 small.segment = small.call[i, ]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
120 chr.small = small.segment$Chr
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
121 start.small = small.segment$Target.Start
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
122 end.small = small.segment$Target.End
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
123 match.large.nocall = large.nocall[large.nocall$Chr==chr.small,]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
124
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
125 match.large.nocall = match.large.nocall[match.large.nocall$Target.End>=start.small,]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
126 match.large.nocall = match.large.nocall[match.large.nocall$Target.Start<=start.small,]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
127 merged.hole = match.large.nocall[match.large.nocall$Target.End>=start.small,]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
128
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
129 if (nrow(merged.hole) > 0){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
130 included.segment = merge(included.segment, small.segment, all=T)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
131 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
132 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
133
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
134 out.f = paste(logcov.file, ".LargeDeletion.txt", sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
135 write.table(included.segment, out.f, sep="\t", quote=F, row.names=F)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
136
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
137
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
138
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
139
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
140
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
141
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
142
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
143
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
144
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
145
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
146
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
147
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
148