Mercurial > repos > fcaramia > contra
comparison Contra/scripts/large_region_cbs.R @ 0:7564f3b1e675
Uploaded
author | fcaramia |
---|---|
date | Thu, 13 Sep 2012 02:31:43 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7564f3b1e675 |
---|---|
1 # ----------------------------------------------------------------------# | |
2 # Copyright (c) 2011, Richard Lupat & Jason Li. | |
3 # | |
4 # > Source License < | |
5 # This file is part of CONTRA. | |
6 # | |
7 # CONTRA is free software: you can redistribute it and/or modify | |
8 # it under the terms of the GNU General Public License as published by | |
9 # the Free Software Foundation, either version 3 of the License, or | |
10 # (at your option) any later version. | |
11 # | |
12 # CONTRA is distributed in the hope that it will be useful, | |
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 # GNU General Public License for more details. | |
16 # | |
17 # You should have received a copy of the GNU General Public License | |
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>. | |
19 # | |
20 # | |
21 #-----------------------------------------------------------------------# | |
22 # Last Updated : 12 October 2011 16:43PM | |
23 | |
24 library(DNAcopy) | |
25 options <- commandArgs(trailingOnly = T) | |
26 logcov.file = options[1] | |
27 param.small.seg = as.numeric(options[2]) | |
28 param.large.seg = as.numeric(options[3]) | |
29 param.pval.cutoff = as.numeric(options[4]) | |
30 param.pvals.size = as.numeric(options[5]) | |
31 param.call.low = as.numeric(options[6]) | |
32 param.call.high = as.numeric(options[7]) | |
33 bufloc = options[8] | |
34 | |
35 #param.small.seg = 1 | |
36 #param.large.seg = 25 | |
37 #param.pval.cutoff = 0.1 | |
38 #param.pvals.size = 0.35 | |
39 | |
40 #param.call.low = -0.3 | |
41 #param.call.high= 0.3 | |
42 | |
43 | |
44 dat = read.delim(logcov.file, as.is=F, header=T) | |
45 t.id = dat$Targeted.Region.ID | |
46 t.mean = dat$Adjusted.Mean.of.LogRatio | |
47 t.chr = dat$Chr | |
48 | |
49 cna.dat <- CNA(t.mean, t.chr, t.id, data.type="logratio") | |
50 smooth.cna.dat = smooth.CNA(cna.dat) | |
51 | |
52 kmax.range = c(param.large.seg, param.small.seg) | |
53 for (i in kmax.range){ | |
54 out.f = paste(bufloc, "/CBS", i ,".txt", sep="") | |
55 cna.segment = segment(smooth.cna.dat, kmax = i, verbose = 1) | |
56 #pdf(paste("segment_", "kmax", i, ".pdf", sep="")) | |
57 #for (chr.list in unique(t.chr)){ | |
58 # plotSample(cna.segment, chromlist=chr.list) | |
59 #} | |
60 #dev.off() | |
61 | |
62 #Get Data | |
63 cna.segment.out <- cna.segment$output | |
64 cna.segment.start = c() | |
65 cna.segment.end = c() | |
66 cna.segment.mean = c() | |
67 cna.segment.pvals.size = c() | |
68 cna.segment.calls = c() | |
69 for (n in 1:nrow(cna.segment.out)){ | |
70 cna.start.id = cna.segment.out[n,]$loc.start | |
71 cna.end.id = cna.segment.out[n,]$loc.end | |
72 cna.start.coord = dat[dat$Targeted.Region.ID==(cna.start.id),]$OriStCoordinate | |
73 cna.end.coord = dat[dat$Targeted.Region.ID==(cna.end.id), ]$OriEndCoordinate | |
74 | |
75 dat.inrange = dat[dat$Targeted.Region.ID<=cna.end.id&dat$Targeted.Region.ID>=cna.start.id,] | |
76 cna.segment.logratios = dat.inrange$Adjusted.Mean.of.LogRatio | |
77 cna.segment.pvalues = dat.inrange$Adjusted.P.Value | |
78 segment.pvals.above = dat.inrange[dat.inrange$Adjusted.P.Value<=param.pval.cutoff,]$Adjusted.P.Value | |
79 | |
80 segment.pvals.size = length(segment.pvals.above)/length(cna.segment.pvalues) | |
81 cna.segment.mean = c(cna.segment.mean, mean(cna.segment.logratios)) | |
82 cna.segment.start = c(cna.segment.start, cna.start.coord) | |
83 cna.segment.end = c(cna.segment.end , cna.end.coord) | |
84 cna.segment.pvals.size = c(cna.segment.pvals.size, segment.pvals.size) | |
85 | |
86 if (segment.pvals.size < param.pvals.size){ | |
87 #No-Call | |
88 cna.segment.calls = c(cna.segment.calls, "No") | |
89 } else { | |
90 m = mean(cna.segment.logratios) | |
91 if ((m > param.call.low) && (m < param.call.high)){ | |
92 #No-Call | |
93 cna.segment.calls = c(cna.segment.calls, "No") | |
94 } else { | |
95 #Call | |
96 cna.segment.calls = c(cna.segment.calls, "CNV") | |
97 } | |
98 } | |
99 } | |
100 | |
101 | |
102 outdf = data.frame(Chr=cna.segment.out$chrom, Target.Start=cna.segment.out$loc.start, Target.End=cna.segment.out$loc.end, NumberOfTargets=cna.segment.out$num.mark, OriStCoordinate=cna.segment.start, OriEndCoordinate=cna.segment.end, CBS.Mean = cna.segment.out$seg.mean, LogRatios = cna.segment.mean, Above.PValues.Cutoff=cna.segment.pvals.size, Calls = cna.segment.calls) | |
103 | |
104 write.table(outdf, out.f, sep="\t", quote=F, row.names=F, col.names=T) | |
105 | |
106 } | |
107 | |
108 small.dat.f = paste(bufloc, "/CBS", param.small.seg ,".txt", sep="") | |
109 large.dat.f = paste(bufloc, "/CBS", param.large.seg ,".txt", sep="") | |
110 | |
111 small.dat = read.delim(small.dat.f, as.is=F, header=T) | |
112 large.dat = read.delim(large.dat.f, as.is=F, header=T) | |
113 | |
114 large.nocall = large.dat[large.dat$Calls=="No",] | |
115 small.call = small.dat[small.dat$Calls!="No",] | |
116 included.segment= large.dat[large.dat$Calls!="No",] | |
117 | |
118 for (i in 1:nrow(small.call)){ | |
119 small.segment = small.call[i, ] | |
120 chr.small = small.segment$Chr | |
121 start.small = small.segment$Target.Start | |
122 end.small = small.segment$Target.End | |
123 match.large.nocall = large.nocall[large.nocall$Chr==chr.small,] | |
124 | |
125 match.large.nocall = match.large.nocall[match.large.nocall$Target.End>=start.small,] | |
126 match.large.nocall = match.large.nocall[match.large.nocall$Target.Start<=start.small,] | |
127 merged.hole = match.large.nocall[match.large.nocall$Target.End>=start.small,] | |
128 | |
129 if (nrow(merged.hole) > 0){ | |
130 included.segment = merge(included.segment, small.segment, all=T) | |
131 } | |
132 } | |
133 | |
134 out.f = paste(logcov.file, ".LargeDeletion.txt", sep="") | |
135 write.table(included.segment, out.f, sep="\t", quote=F, row.names=F) | |
136 | |
137 | |
138 | |
139 | |
140 | |
141 | |
142 | |
143 | |
144 | |
145 | |
146 | |
147 | |
148 |