0
|
1 # ----------------------------------------------------------------------#
|
|
2 # Copyright (c) 2011, Richard Lupat & Jason Li.
|
|
3 #
|
|
4 # > Source License <
|
|
5 # This file is part of CONTRA.
|
|
6 #
|
|
7 # CONTRA is free software: you can redistribute it and/or modify
|
|
8 # it under the terms of the GNU General Public License as published by
|
|
9 # the Free Software Foundation, either version 3 of the License, or
|
|
10 # (at your option) any later version.
|
|
11 #
|
|
12 # CONTRA is distributed in the hope that it will be useful,
|
|
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 # GNU General Public License for more details.
|
|
16 #
|
|
17 # You should have received a copy of the GNU General Public License
|
|
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
|
|
19 #
|
|
20 #
|
|
21 #-----------------------------------------------------------------------#
|
|
22 # Last Updated : 12 October 2011 16:43PM
|
|
23
|
|
24 library(DNAcopy)
|
|
25 options <- commandArgs(trailingOnly = T)
|
|
26 logcov.file = options[1]
|
|
27 param.small.seg = as.numeric(options[2])
|
|
28 param.large.seg = as.numeric(options[3])
|
|
29 param.pval.cutoff = as.numeric(options[4])
|
|
30 param.pvals.size = as.numeric(options[5])
|
|
31 param.call.low = as.numeric(options[6])
|
|
32 param.call.high = as.numeric(options[7])
|
|
33 bufloc = options[8]
|
|
34
|
|
35 #param.small.seg = 1
|
|
36 #param.large.seg = 25
|
|
37 #param.pval.cutoff = 0.1
|
|
38 #param.pvals.size = 0.35
|
|
39
|
|
40 #param.call.low = -0.3
|
|
41 #param.call.high= 0.3
|
|
42
|
|
43
|
|
44 dat = read.delim(logcov.file, as.is=F, header=T)
|
|
45 t.id = dat$Targeted.Region.ID
|
|
46 t.mean = dat$Adjusted.Mean.of.LogRatio
|
|
47 t.chr = dat$Chr
|
|
48
|
|
49 cna.dat <- CNA(t.mean, t.chr, t.id, data.type="logratio")
|
|
50 smooth.cna.dat = smooth.CNA(cna.dat)
|
|
51
|
|
52 kmax.range = c(param.large.seg, param.small.seg)
|
|
53 for (i in kmax.range){
|
|
54 out.f = paste(bufloc, "/CBS", i ,".txt", sep="")
|
|
55 cna.segment = segment(smooth.cna.dat, kmax = i, verbose = 1)
|
|
56 #pdf(paste("segment_", "kmax", i, ".pdf", sep=""))
|
|
57 #for (chr.list in unique(t.chr)){
|
|
58 # plotSample(cna.segment, chromlist=chr.list)
|
|
59 #}
|
|
60 #dev.off()
|
|
61
|
|
62 #Get Data
|
|
63 cna.segment.out <- cna.segment$output
|
|
64 cna.segment.start = c()
|
|
65 cna.segment.end = c()
|
|
66 cna.segment.mean = c()
|
|
67 cna.segment.pvals.size = c()
|
|
68 cna.segment.calls = c()
|
|
69 for (n in 1:nrow(cna.segment.out)){
|
|
70 cna.start.id = cna.segment.out[n,]$loc.start
|
|
71 cna.end.id = cna.segment.out[n,]$loc.end
|
|
72 cna.start.coord = dat[dat$Targeted.Region.ID==(cna.start.id),]$OriStCoordinate
|
|
73 cna.end.coord = dat[dat$Targeted.Region.ID==(cna.end.id), ]$OriEndCoordinate
|
|
74
|
|
75 dat.inrange = dat[dat$Targeted.Region.ID<=cna.end.id&dat$Targeted.Region.ID>=cna.start.id,]
|
|
76 cna.segment.logratios = dat.inrange$Adjusted.Mean.of.LogRatio
|
|
77 cna.segment.pvalues = dat.inrange$Adjusted.P.Value
|
|
78 segment.pvals.above = dat.inrange[dat.inrange$Adjusted.P.Value<=param.pval.cutoff,]$Adjusted.P.Value
|
|
79
|
|
80 segment.pvals.size = length(segment.pvals.above)/length(cna.segment.pvalues)
|
|
81 cna.segment.mean = c(cna.segment.mean, mean(cna.segment.logratios))
|
|
82 cna.segment.start = c(cna.segment.start, cna.start.coord)
|
|
83 cna.segment.end = c(cna.segment.end , cna.end.coord)
|
|
84 cna.segment.pvals.size = c(cna.segment.pvals.size, segment.pvals.size)
|
|
85
|
|
86 if (segment.pvals.size < param.pvals.size){
|
|
87 #No-Call
|
|
88 cna.segment.calls = c(cna.segment.calls, "No")
|
|
89 } else {
|
|
90 m = mean(cna.segment.logratios)
|
|
91 if ((m > param.call.low) && (m < param.call.high)){
|
|
92 #No-Call
|
|
93 cna.segment.calls = c(cna.segment.calls, "No")
|
|
94 } else {
|
|
95 #Call
|
|
96 cna.segment.calls = c(cna.segment.calls, "CNV")
|
|
97 }
|
|
98 }
|
|
99 }
|
|
100
|
|
101
|
|
102 outdf = data.frame(Chr=cna.segment.out$chrom, Target.Start=cna.segment.out$loc.start, Target.End=cna.segment.out$loc.end, NumberOfTargets=cna.segment.out$num.mark, OriStCoordinate=cna.segment.start, OriEndCoordinate=cna.segment.end, CBS.Mean = cna.segment.out$seg.mean, LogRatios = cna.segment.mean, Above.PValues.Cutoff=cna.segment.pvals.size, Calls = cna.segment.calls)
|
|
103
|
|
104 write.table(outdf, out.f, sep="\t", quote=F, row.names=F, col.names=T)
|
|
105
|
|
106 }
|
|
107
|
|
108 small.dat.f = paste(bufloc, "/CBS", param.small.seg ,".txt", sep="")
|
|
109 large.dat.f = paste(bufloc, "/CBS", param.large.seg ,".txt", sep="")
|
|
110
|
|
111 small.dat = read.delim(small.dat.f, as.is=F, header=T)
|
|
112 large.dat = read.delim(large.dat.f, as.is=F, header=T)
|
|
113
|
|
114 large.nocall = large.dat[large.dat$Calls=="No",]
|
|
115 small.call = small.dat[small.dat$Calls!="No",]
|
|
116 included.segment= large.dat[large.dat$Calls!="No",]
|
|
117
|
|
118 for (i in 1:nrow(small.call)){
|
|
119 small.segment = small.call[i, ]
|
|
120 chr.small = small.segment$Chr
|
|
121 start.small = small.segment$Target.Start
|
|
122 end.small = small.segment$Target.End
|
|
123 match.large.nocall = large.nocall[large.nocall$Chr==chr.small,]
|
|
124
|
|
125 match.large.nocall = match.large.nocall[match.large.nocall$Target.End>=start.small,]
|
|
126 match.large.nocall = match.large.nocall[match.large.nocall$Target.Start<=start.small,]
|
|
127 merged.hole = match.large.nocall[match.large.nocall$Target.End>=start.small,]
|
|
128
|
|
129 if (nrow(merged.hole) > 0){
|
|
130 included.segment = merge(included.segment, small.segment, all=T)
|
|
131 }
|
|
132 }
|
|
133
|
|
134 out.f = paste(logcov.file, ".LargeDeletion.txt", sep="")
|
|
135 write.table(included.segment, out.f, sep="\t", quote=F, row.names=F)
|
|
136
|
|
137
|
|
138
|
|
139
|
|
140
|
|
141
|
|
142
|
|
143
|
|
144
|
|
145
|
|
146
|
|
147
|
|
148
|