annotate Contra/scripts/cn_analysis.v3.R @ 12:d63052c0d852

Uploaded
author fcaramia
date Sun, 02 Dec 2012 23:10:12 -0500
parents 7564f3b1e675
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
1 # ----------------------------------------------------------------------#
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
2 # Copyright (c) 2011, Richard Lupat & Jason Li.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
3 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
4 # > Source License <
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
5 # This file is part of CONTRA.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
6 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
7 # CONTRA is free software: you can redistribute it and/or modify
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
8 # it under the terms of the GNU General Public License as published by
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
10 # (at your option) any later version.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
11 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
12 # CONTRA is distributed in the hope that it will be useful,
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
15 # GNU General Public License for more details.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
16 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
17 # You should have received a copy of the GNU General Public License
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
19 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
20 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
21 #-----------------------------------------------------------------------#
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
22 # Last Updated : 31 Oct 2011 17:00PM
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
23
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
24
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
25 # Parameters Parsing (from Command Line)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
26 options <- commandArgs(trailingOnly = T)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
27 bins = as.integer(options[1])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
28 rd.cutoff = as.integer(options[2])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
29 min.bases = as.integer(options[3])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
30 outf = options[4]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
31 sample.name = options[5]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
32 plotoption = options[6]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
33 actual.bin = as.numeric(options[7])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
34 min_normal_rd_for_call = as.numeric(options[8])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
35 min_tumour_rd_for_call = as.numeric(options[9])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
36 min_avg_cov_for_call = as.numeric(options[10])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
37
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
38 if (sample.name == "No-SampleName")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
39 sample.name = ""
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
40
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
41 if (sample.name != "")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
42 sample.name = paste(sample.name, ".", sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
43
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
44 # Setup output name
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
45 out.f = paste(outf, "/table/", sample.name, "CNATable.", rd.cutoff,"rd.", min.bases,"bases.", bins,"bins.txt", sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
46 pdf.out.f = paste(outf, "/plot/", sample.name, "densityplot.", bins, "bins.pdf", sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
47
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
48 # Open and read input files
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
49 # cnAverageFile = paste("bin", bins, ".txt", sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
50 cnAverageFile = paste(outf,"/buf/bin",bins,".txt",sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
51 boundariesFile = paste(outf,"/buf/bin",bins,".boundaries.txt",sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
52 print (cnAverageFile)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
53 cn.average = read.delim(cnAverageFile, as.is=F, header=F)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
54 cn.boundary= read.delim(boundariesFile,as.is=F, header=F)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
55
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
56 # Apply thresholds and data grouping
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
57 cn.average.aboveTs = cn.average[cn.average$V3>min.bases,]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
58 cn.average.list = as.matrix(cn.average.aboveTs$V4)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
59
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
60 # Get the mean and sd for each bins
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
61 cn.average.mean = c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
62 cn.average.sd = c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
63 cn.average.log= c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
64
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
65 # Density Plots for each bins
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
66 if (plotoption == "True"){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
67 pdf(pdf.out.f)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
68 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
69 for (j in 1:actual.bin){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
70 cn.average.nth = as.matrix(cn.average.aboveTs[cn.average.aboveTs$V15==j,]$V4)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
71 cn.coverage.nth = as.matrix(cn.average.aboveTs[cn.average.aboveTs$V15==j,]$V11)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
72 boundary.end = cn.boundary[cn.boundary$V1==j,]$V2
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
73 boundary.start = cn.boundary[cn.boundary$V1==(j-1),]$V2
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
74 boundary.mid = (boundary.end+boundary.start)/2
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
75 if (plotoption == "True") {
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
76 plot_title = paste("density: bin", bins, sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
77 #plot(density(cn.average.nth),xlim=c(-5,5), title=plot_title)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
78 plot(density(cn.average.nth),xlim=c(-5,5))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
79 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
80 cn.average.mean = c(cn.average.mean, mean(cn.average.nth))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
81 # cn.average.sd = c(cn.average.sd, sd(cn.average.nth))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
82 cn.average.sd = c(cn.average.sd, apply(cn.average.nth,2,sd))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
83 #cn.average.log = c(cn.average.log, boundary.mid)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
84 cn.average.log = c(cn.average.log, log(mean(cn.coverage.nth),2))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
85 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
86 if (plotoption == "True"){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
87 dev.off()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
88 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
89
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
90 # for point outside of boundaries
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
91 if (bins > 1) {
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
92 boundary.first = cn.boundary[cn.boundary$V1==0,]$V2
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
93 boundary.last = cn.boundary[cn.boundary$V1==bins,]$V2
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
94
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
95 b.mean.y2 = cn.average.mean[2]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
96 b.mean.y1 = cn.average.mean[1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
97 b.sd.y2 = cn.average.sd[2]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
98 b.sd.y1 = cn.average.sd[1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
99 b.x2 = cn.average.log[2]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
100 b.x1 = cn.average.log[1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
101
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
102 boundary.f.mean = (((b.mean.y2- b.mean.y1)/(b.x2-b.x1))*(boundary.first-b.x1))+b.mean.y1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
103 boundary.f.sd = (((b.sd.y2- b.sd.y1)/(b.x2-b.x1))*(boundary.first-b.x1))+b.sd.y1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
104
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
105 if (boundary.f.sd < 0){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
106 boundary.f.sd = 0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
107 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
108
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
109 b.mean.y2 = cn.average.mean[bins]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
110 b.mean.y1 = cn.average.mean[bins-1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
111 b.sd.y2 = cn.average.sd[bins]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
112 b.sd.y1 = cn.average.sd[bins-1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
113 b.x2 = cn.average.log[bins]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
114 b.x1 = cn.average.log[bins-1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
115
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
116 boundary.l.mean = (((b.mean.y2- b.mean.y1)/(b.x2-b.x1))*(boundary.last-b.x1))+b.mean.y1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
117 boundary.l.sd = (((b.sd.y2- b.sd.y1)/(b.x2-b.x1))*(boundary.last-b.x1))+b.sd.y1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
118
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
119 #cn.average.log = c(boundary.first, cn.average.log, boundary.last)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
120 #cn.linear.mean = c(boundary.f.mean, cn.average.mean, boundary.l.mean)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
121 #cn.linear.sd = c(boundary.f.sd, cn.average.sd, boundary.l.sd)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
122
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
123 cn.average.log = c(boundary.first, cn.average.log)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
124 cn.linear.mean = c(boundary.f.mean, cn.average.mean)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
125 cn.linear.sd = c(boundary.f.sd, cn.average.sd)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
126
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
127 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
128
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
129 # Linear Interpolation
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
130 if (bins > 1 ){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
131 #print(cn.average.log)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
132 #print(cn.linear.mean)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
133 #print(cn.linear.sd)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
134 mean.fn <- approxfun(cn.average.log, cn.linear.mean, rule=2)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
135 sd.fn <- approxfun(cn.average.log, cn.linear.sd, rule=2)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
136 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
137
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
138
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
139 # Put the data's details into matrices
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
140 ids = as.matrix(cn.average.aboveTs$V1)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
141 exons = as.matrix(cn.average.aboveTs$V6)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
142 exons.pos = as.matrix(cn.average.aboveTs$V5)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
143 gs = as.matrix(cn.average.aboveTs$V2)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
144 number.bases = as.matrix(cn.average.aboveTs$V3)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
145 mean = as.matrix(cn.average.aboveTs$V4)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
146 sd = as.matrix(cn.average.aboveTs$V7)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
147 tumour.rd = as.matrix(cn.average.aboveTs$V8)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
148 tumour.rd.ori = as.matrix(cn.average.aboveTs$V10)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
149 normal.rd = as.matrix(cn.average.aboveTs$V9)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
150 normal.rd.ori = as.matrix(cn.average.aboveTs$V11)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
151 median = as.matrix(cn.average.aboveTs$V12)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
152 MinLogRatio = as.matrix(cn.average.aboveTs$V13)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
153 MaxLogRatio = as.matrix(cn.average.aboveTs$V14)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
154 Bin = as.matrix(cn.average.aboveTs$V15)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
155 Chr = as.matrix(cn.average.aboveTs$V16)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
156 OriStCoordinate = as.matrix(cn.average.aboveTs$V17)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
157 OriEndCoordinate= as.matrix(cn.average.aboveTs$V18)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
158
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
159 # Linear Fit
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
160 logratios.mean = mean
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
161 logcov.mean = log2((normal.rd + tumour.rd)/2)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
162 fit.mean = lm(logratios.mean ~ logcov.mean)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
163 fit.x = fit.mean$coefficient[1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
164 fit.y = fit.mean$coefficient[2]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
165
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
166 adjusted.lr = rep(NA, length(logratios.mean))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
167 for (j in 1:length(logratios.mean)){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
168 fitted.mean = fit.x + fit.y * logcov.mean[j]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
169 adjusted.lr[j] = logratios.mean[j] - fitted.mean
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
170 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
171
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
172 fit.mean2 = lm(adjusted.lr ~ logcov.mean)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
173 fit.mean.a = fit.mean2$coefficient[1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
174 fit.mean.b = fit.mean2$coefficient[2]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
175
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
176 fit.mean.fn <- function(x, fit.a, fit.b){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
177 result = fit.a + fit.b * x
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
178 return (result)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
179 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
180
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
181 # Adjust SD based on the new adjusted log ratios
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
182 logratios.sd = c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
183 logcov.bins.mean= c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
184 for (j in 1:actual.bin){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
185 lr.bins.mean = as.matrix(adjusted.lr[cn.average.aboveTs$V15==j])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
186 # logratios.sd = c(logratios.sd, sd(lr.bins.mean))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
187 logratios.sd = c(logratios.sd, apply(lr.bins.mean,2,sd))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
188
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
189 cn.coverage.tumour.nth = as.matrix(cn.average.aboveTs[cn.average.aboveTs$V15==j,]$V8)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
190 cn.coverage.normal.nth = as.matrix(cn.average.aboveTs[cn.average.aboveTs$V15==j,]$V9)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
191 cn.coverage.nth = (cn.coverage.tumour.nth + cn.coverage.normal.nth) /2
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
192 logcov.bins.mean= c(logcov.bins.mean, log2(mean(cn.coverage.nth)))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
193
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
194 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
195
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
196 logratios.sd.ori = logratios.sd
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
197 if (length(logratios.sd) > 2) {
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
198 logratios.sd = logratios.sd[-length(logratios.sd)]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
199 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
200
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
201 logcov.bins.mean.ori = logcov.bins.mean
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
202 if (length(logcov.bins.mean) > 2){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
203 logcov.bins.mean= logcov.bins.mean[-length(logcov.bins.mean)]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
204 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
205
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
206 fit.sd = lm(log2(logratios.sd) ~ logcov.bins.mean)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
207 fit.sd.a = fit.sd$coefficient[1]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
208 fit.sd.b = fit.sd$coefficient[2]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
209
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
210 fit.sd.fn <- function(x, fit.a, fit.b){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
211 result = 2 ^ (fit.mean.fn(x, fit.a, fit.b))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
212 return (result)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
213 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
214
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
215 # Get the P Values, called the gain/loss
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
216 # with average and sd from each bins
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
217 pVal.list = c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
218 gain.loss = c()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
219
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
220 for (i in 1:nrow(cn.average.list)){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
221 #print (i)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
222 #logratio = cn.average.list[i]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
223 #logcov = log(normal.rd.ori[i],2)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
224 logratio = adjusted.lr[i]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
225 logcov = logcov.mean[i]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
226 exon.bin = Bin[i]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
227
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
228 if (length(logratios.sd) > 1){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
229 #pVal <- pnorm(logratio, fit.mean.fn(logcov, fit.mean.a, fit.mean.b), fit.sd.fn(logcov, fit.sd.a, fit.sd.b))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
230 pVal <- pnorm(logratio, fit.mean.fn(logcov, fit.mean.a, fit.mean.b), sd.fn(logcov))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
231 } else {
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
232 pVal <- pnorm(logratio, 0, logratios.sd[exon.bin])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
233 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
234
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
235 if (pVal > 0.5){
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
236 pVal = 1-pVal
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
237 gain.loss = c(gain.loss, "gain")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
238 } else {
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
239 gain.loss = c(gain.loss, "loss")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
240 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
241 pVal.list = c(pVal.list, pVal*2)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
242 }
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
243
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
244 # Get the adjusted P Values
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
245 adjusted.pVal.list = p.adjust(pVal.list, method="BH")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
246
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
247 # Write the output into a tab-delimited text files
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
248 outdf=data.frame(Targeted.Region.ID=ids,Exon.Number=exons,Gene.Sym=gs,Chr, OriStCoordinate, OriEndCoordinate, Mean.of.LogRatio=cn.average.list, Adjusted.Mean.of.LogRatio=adjusted.lr, SD.of.LogRatio=sd, Median.of.LogRatio=median, number.bases, P.Value=pVal.list ,Adjusted.P.Value=adjusted.pVal.list , gain.loss, tumour.rd, normal.rd, tumour.rd.ori, normal.rd.ori, MinLogRatio, MaxLogRatio, BinNumber = Bin)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
249
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
250 #min_normal_rd_for_call=5
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
251 #min_tumour_rd_for_call=0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
252 #min_avg_cov_for_call=20
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
253 outdf$tumour.rd.ori = outdf$tumour.rd.ori-0.5
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
254 outdf$normal.rd.ori = outdf$normal.rd.ori-0.5
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
255 wh.to.excl = outdf$normal.rd.ori < min_normal_rd_for_call
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
256 wh.to.excl = wh.to.excl | outdf$tumour.rd.ori < min_tumour_rd_for_call
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
257 wh.to.excl = wh.to.excl | (outdf$tumour.rd.ori+outdf$normal.rd.ori)/2 < min_avg_cov_for_call
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
258 outdf$P.Value[wh.to.excl]=NA
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
259 outdf$Adjusted.P.Value[wh.to.excl]=NA
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
260
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
261
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
262 write.table(outdf,out.f,sep="\t",quote=F,row.names=F,col.names=T)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
263
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
264 #Plotting SD
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
265 #a.sd.fn = rep(fit.sd.a, length(logratios.sd.ori))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
266 #b.sd.fn = rep(fit.sd.b, length(logratios.sd.ori))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
267 #sd.after.fit = fit.sd.fn(logcov.bins.mean.ori, fit.sd.a, fit.sd.b)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
268 #sd.out.f = paste(outf, "/plot/", sample.name, "sd.data_fit.", bins, "bins.txt", sep="")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
269 #sd.outdf = data.frame(SD.Before.Fit = logratios.sd.ori, Log.Coverage = logcov.bins.mean.ori, SD.After.Fit = sd.after.fit, a.for.fitting=a.sd.fn, b.for.fitting=b.sd.fn)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
270 #write.table(sd.outdf, sd.out.f,sep="\t", quote=F, row.names=F, col.names=T)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
271
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
272
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
273 #End of the script
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
274 print ("End of cn_analysis.R")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
275 print (i)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
276
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
277
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
278