annotate mosaics/R/mosaicsFit.R @ 10:d78c3c5e8ff8 draft

Uploaded
author dongjun
date Thu, 10 Jan 2013 16:01:28 -0500
parents b6d0c6ceda2c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
1
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
2 # fit MOSAiCS
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
3
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
4 setMethod(
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
5 f="mosaicsFit",
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
6 signature="BinData",
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
7 definition=function( object, analysisType="automatic", bgEst="automatic",
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
8 k=3, meanThres=NA, s=2, d=0.25, truncProb=0.999,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
9 parallel=FALSE, nCore=8 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
10 {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
11 # Note: users can tune parameters only regarding MOSAiCS model fitting.
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
12 # Note: tuning adaptive griding parameters is not supported yet.
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
13
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
14 # check options: parallel computing (optional)
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
15
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
16 if ( parallel == TRUE ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
17 message( "Use 'parallel' package for parallel computing." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
18 if ( length(find.package('parallel',quiet=TRUE)) == 0 ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
19 stop( "Please install 'parallel' package!" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
20 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
21 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
22
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
23 if ( analysisType == "automatic" ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
24 # if "analysisType" is not specified, "analysisType" is determined by dataset
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
25
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
26 if ( length(object@input)==0 ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
27 # we don't have input: we should have both of M and GC
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
28
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
29 if ( length(object@mappability)>0 & length(object@gcContent)>0 ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
30 #cat( "Info: one-sample analysis.\n" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
31 analysisType <- "OS"
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
32 } else {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
33 stop( "any of control data, mappability, or GC content does not exist. Cannot proceed!\n" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
34 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
35 } else {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
36 # we have input: TS if we have both M & GC; IO otherwise.
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
37
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
38 if ( length(object@mappability)>0 & length(object@gcContent)>0 ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
39 #cat( "Info: two-sample analysis (with mappability & GC content).\n" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
40 analysisType <- "TS"
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
41 } else {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
42 #cat( "Info: two-sample analysis (Input only).\n" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
43 analysisType <- "IO"
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
44 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
45 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
46 } else {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
47 # if "analysisType" is specified, check its validity
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
48
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
49 # error treatment: Input-only analysis is impossible if input data does not exist
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
50
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
51 if ( analysisType=="IO" & length(object@input)==0 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
52 {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
53 message( "Info: two-sample analysis (Input only)." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
54 stop( "control data does not exist. Cannot proceed!\n" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
55 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
56
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
57 # error treatment: If M or GC does not exist, TS analysis is not available
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
58
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
59 if ( analysisType=="OS" & ( length(object@mappability)==0 | length(object@gcContent)==0 ) )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
60 {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
61 message( "Info: one-sample analysis." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
62 stop( "mappability or GC content does not exist. Cannot proceed!\n" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
63 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
64
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
65 # error treatment: If input data does not exist, TS analysis is not available
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
66
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
67 if ( analysisType=="TS" & length(object@input)==0 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
68 {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
69 message( "Info: two-sample analysis (with mappability & GC content)." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
70 message( "Info: control data does not exist." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
71 message( "Info: one-sample analysis will be implemented instead." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
72 analysisType <- "OS"
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
73 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
74
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
75 # error treatment: If M or GC does not exist, TS analysis is not available
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
76
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
77 if ( analysisType=="TS" & ( length(object@mappability)==0 | length(object@gcContent)==0 ) )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
78 {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
79 message( "Info: two-sample analysis (with mappability & GC content)." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
80 message( "Info: mappability or GC content data does not exist." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
81 message( "Info: two-sample analysis (Input only) will be implemented instead." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
82 analysisType <- "IO"
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
83 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
84 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
85
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
86 # check validity of "bgEst"
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
87
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
88 if ( bgEst == "automatic" ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
89 message( "Info: background estimation method is determined based on data." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
90
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
91 Y_freq <- table( object@tagCount )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
92
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
93 if ( sum(Y_freq[ as.numeric(names(Y_freq))<=2 ]) / sum(Y_freq) > 0.5 ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
94 message( "Info: background estimation based on bins with low tag counts." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
95 bgEst <- "matchLow"
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
96 } else {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
97 message( "Info: background estimation based on robust method of moment" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
98 bgEst <- "rMOM"
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
99 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
100 } else if ( bgEst == "matchLow" ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
101 message( "Info: background estimation based on bins with low tag counts." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
102 } else if ( bgEst == "rMOM" ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
103 message( "Info: background estimation based on robust method of moment." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
104 } else {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
105 stop( "Incorrect specification for 'bgEst'! 'bgEst' should be one of 'matchLow', 'rMOM', or 'automatic'!" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
106 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
107
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
108 # default meanThres for each of "OS" & "TS"
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
109
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
110 if ( is.na(meanThres) )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
111 {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
112 switch( analysisType,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
113 OS = {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
114 meanThres <- 0
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
115 },
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
116 TS = {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
117 meanThres <- 1
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
118 },
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
119 IO = {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
120 meanThres <- NA # meanThres is not used for analysisType=="IO"
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
121 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
122 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
123 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
124
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
125 # MOSAiCS model fit
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
126
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
127 switch( analysisType,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
128 OS = {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
129 # one-sample analysis
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
130
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
131 message( "Info: one-sample analysis." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
132 fit <- .mosaicsFit_OS( object, bgEst=bgEst, k=k, meanThres=meanThres,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
133 parallel=parallel, nCore=nCore )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
134 },
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
135 TS = {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
136 # two-sample analysis (with M & GC)
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
137
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
138 message( "Info: two-sample analysis (with mappability & GC content)." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
139 fit <- .mosaicsFit_TS( object, bgEst=bgEst,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
140 k=k, meanThres=meanThres, s=s, d=d,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
141 parallel=parallel, nCore=nCore )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
142 },
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
143 IO = {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
144 # two-sample analysis (Input only)
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
145
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
146 message( "Info: two-sample analysis (Input only)." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
147 fit <- .mosaicsFit_IO( object, bgEst=bgEst,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
148 k=k, d=d, truncProb=truncProb,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
149 parallel=parallel, nCore=nCore )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
150 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
151 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
152
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
153 message( "Info: done!" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
154
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
155 return(fit)
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
156 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
157 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
158
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
159 # MOSAiCS one-sample analysis
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
160
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
161 .mosaicsFit_OS <- function( binData, bgEst, k=3, meanThres=0,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
162 parallel=FALSE, nCore=8 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
163 {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
164 message( "Info: use adaptive griding." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
165 message( "Info: fitting background model..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
166 fitParam <- .adapGridMosaicsZ0_OS(
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
167 Y=binData@tagCount, M=binData@mappability, GC=binData@gcContent,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
168 bgEst=bgEst, min_n_MGC=50, grids_MGC=c(0.01,0.02,0.04,0.10,0.20,0.50),
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
169 parallel=parallel, nCore=nCore )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
170 fitZ0 <- .rlmFit_OS( parEst=fitParam, mean_thres=meanThres,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
171 Y=binData@tagCount, M=binData@mappability, GC=binData@gcContent )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
172 pNfit <- .calcPN( Y=binData@tagCount, k=k, a=fitZ0$a, mu_est=fitZ0$muEst )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
173
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
174 rm( fitParam )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
175 gc()
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
176 message( "Info: done!" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
177
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
178 Y_bd_all <- .calcYbdAll( fitZ0, k=k )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
179 message( "Info: fitting one-signal-component model..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
180 fitZ1_1S <- .mosaicsZ1_1S( fitZ0, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
181 pNfit=pNfit, Y_bd_all=Y_bd_all, k=k )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
182 message( "Info: fitting two-signal-component model..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
183 fitZ1_2S <- .mosaicsZ1_2S( fitZ0, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
184 pNfit=pNfit, Y_bd_all=Y_bd_all, k=k )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
185
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
186 message( "Info: calculating BIC of fitted models..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
187 fitBIC_1S <- .calcModelBIC( fitZ1=fitZ1_1S, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
188 pNfit=pNfit, k=k, model="1S", type="BIC", npar=9 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
189 fitBIC_2S <- .calcModelBIC( fitZ1=fitZ1_2S, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
190 pNfit=pNfit, k=k, model="2S", type="BIC", npar=12 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
191
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
192 mosaicsEst <- new( "MosaicsFitEst",
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
193 pi0=fitZ0$pi0, a=fitZ0$a,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
194 betaEst=fitZ0$betaEst, muEst=fitZ0$muEst, pNfit=pNfit,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
195 b=fitZ1_1S$b, c=fitZ1_1S$c,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
196 p1=fitZ1_2S$p1, b1=fitZ1_2S$b1, c1=fitZ1_2S$c1, b2=fitZ1_2S$b2, c2=fitZ1_2S$c2,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
197 analysisType="OS" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
198
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
199 rm( fitZ0, fitZ1_1S, fitZ1_2S )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
200 gc()
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
201
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
202 mosaicsParam <- new( "MosaicsFitParam", k=k, meanThres=meanThres )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
203
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
204 new( "MosaicsFit",
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
205 mosaicsEst=mosaicsEst, mosaicsParam=mosaicsParam,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
206 chrID=binData@chrID, coord=binData@coord, tagCount=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
207 mappability=binData@mappability, gcContent=binData@gcContent,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
208 bic1S=fitBIC_1S, bic2S=fitBIC_2S )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
209 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
210
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
211 # MOSAiCS two-sample analysis (with M & GC)
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
212
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
213 .mosaicsFit_TS <- function( binData, bgEst, k=3, meanThres=1, s=2, d=0.25,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
214 parallel=FALSE, nCore=8 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
215 {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
216 message( "Info: use adaptive griding." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
217 message( "Info: fitting background model..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
218
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
219 # warning if there are insufficient # of bins with 0, 1, 2 counts in control sample
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
220 # -> input-only analysis is preferred.
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
221
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
222 X_freq <- table(binData@input)
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
223 if( sum(X_freq[ as.numeric(names(X_freq))<=2 ])/sum(X_freq) < 0.5 ) {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
224 message( paste("Info: insufficient # of bins with counts <=", s, "in control sample.") )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
225 message( "Info: Model fit can be unstable. Input-only analysis might be preferred." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
226 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
227
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
228 fitParam <- .adapGridMosaicsZ0_TS(
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
229 Y=binData@tagCount, M=binData@mappability, GC=binData@gcContent, X=binData@input,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
230 bgEst=bgEst, min_n_MGC=50, grids_MGC=c(0.01,0.02,0.04,0.10,0.20,0.50), min_n_X=200,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
231 parallel=parallel, nCore=nCore )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
232 fitZ0 <- .rlmFit_TS( parEst=fitParam, mean_thres=meanThres, s=s, d=d,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
233 Y=binData@tagCount, M=binData@mappability, GC=binData@gcContent, X=binData@input )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
234 pNfit <- .calcPN( Y=binData@tagCount, k=k, a=fitZ0$a, mu_est=fitZ0$muEst )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
235
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
236 rm( fitParam )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
237 gc()
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
238 message( "Info: done!" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
239
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
240 Y_bd_all <- .calcYbdAll( fitZ0, k=k )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
241 message( "Info: fitting one-signal-component model..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
242 fitZ1_1S <- .mosaicsZ1_1S( fitZ0, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
243 pNfit=pNfit, Y_bd_all=Y_bd_all, k=k )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
244 message( "Info: fitting two-signal-component model..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
245 fitZ1_2S <- .mosaicsZ1_2S( fitZ0, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
246 pNfit=pNfit, Y_bd_all=Y_bd_all, k=k )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
247
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
248 message( "Info: calculating BIC of fitted models..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
249 fitBIC_1S <- .calcModelBIC( fitZ1=fitZ1_1S, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
250 pNfit=pNfit, k=k, model="1S", type="BIC", npar=11 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
251 fitBIC_2S <- .calcModelBIC( fitZ1=fitZ1_2S, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
252 pNfit=pNfit, k=k, model="2S", type="BIC", npar=14 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
253
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
254 mosaicsEst <- new( "MosaicsFitEst",
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
255 pi0=fitZ0$pi0, a=fitZ0$a,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
256 betaEst=fitZ0$betaEst, muEst=fitZ0$muEst, pNfit=pNfit,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
257 b=fitZ1_1S$b, c=fitZ1_1S$c,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
258 p1=fitZ1_2S$p1, b1=fitZ1_2S$b1, c1=fitZ1_2S$c1, b2=fitZ1_2S$b2, c2=fitZ1_2S$c2,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
259 analysisType="TS" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
260
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
261 rm( fitZ0, fitZ1_1S, fitZ1_2S )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
262 gc()
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
263
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
264 mosaicsParam <- new( "MosaicsFitParam", k=k, meanThres=meanThres, s=s, d=d )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
265
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
266 new( "MosaicsFit",
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
267 mosaicsEst=mosaicsEst, mosaicsParam=mosaicsParam,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
268 chrID=binData@chrID, coord=binData@coord,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
269 tagCount=binData@tagCount, input=binData@input,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
270 mappability=binData@mappability, gcContent=binData@gcContent,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
271 bic1S=fitBIC_1S, bic2S=fitBIC_2S )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
272 }
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
273
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
274 # MOSAiCS two-sample analysis (Input only)
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
275
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
276 .mosaicsFit_IO <- function( binData, bgEst, k=3, d=0.25, truncProb=0.999,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
277 parallel=FALSE, nCore=8 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
278 {
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
279 message( "Info: use adaptive griding." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
280 message( "Info: fitting background model..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
281
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
282 inputTrunc <- quantile( binData@input, truncProb )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
283
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
284 #fitParam <- .adapGridMosaicsZ0_IO( Y=binData@tagCount, X=binData@input,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
285 # min_n_X=50 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
286 fitParam <- .adapGridMosaicsZ0_IO( Y=binData@tagCount, X=binData@input,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
287 bgEst=bgEst, inputTrunc=inputTrunc, min_n_X=50,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
288 parallel=parallel, nCore=nCore )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
289 fitZ0 <- .rlmFit_IO( parEst=fitParam, d=d, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
290 X=binData@input, inputTrunc=inputTrunc )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
291 pNfit <- .calcPN( Y=binData@tagCount, k=k, a=fitZ0$a, mu_est=fitZ0$muEst )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
292
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
293 rm( fitParam )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
294 gc()
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
295 message( "Info: done!" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
296
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
297 Y_bd_all <- .calcYbdAll( fitZ0, k=k )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
298 message( "Info: fitting one-signal-component model..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
299 fitZ1_1S <- .mosaicsZ1_1S( fitZ0, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
300 pNfit=pNfit, Y_bd_all=Y_bd_all, k=k )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
301 message( "Info: fitting two-signal-component model..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
302 fitZ1_2S <- .mosaicsZ1_2S( fitZ0, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
303 pNfit=pNfit, Y_bd_all=Y_bd_all, k=k )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
304
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
305 message( "Info: calculating BIC of fitted models..." )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
306 fitBIC_1S <- .calcModelBIC( fitZ1=fitZ1_1S, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
307 pNfit=pNfit, k=k, model="1S", type="BIC", npar=6 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
308 fitBIC_2S <- .calcModelBIC( fitZ1=fitZ1_2S, Y=binData@tagCount,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
309 pNfit=pNfit, k=k, model="2S", type="BIC", npar=9 )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
310
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
311 mosaicsEst <- new( "MosaicsFitEst",
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
312 pi0=fitZ0$pi0, a=fitZ0$a,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
313 betaEst=fitZ0$betaEst, muEst=fitZ0$muEst, pNfit=pNfit,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
314 b=fitZ1_1S$b, c=fitZ1_1S$c,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
315 p1=fitZ1_2S$p1, b1=fitZ1_2S$b1, c1=fitZ1_2S$c1, b2=fitZ1_2S$b2, c2=fitZ1_2S$c2,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
316 inputTrunc=inputTrunc, analysisType="IO" )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
317
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
318 rm( fitZ0, fitZ1_1S, fitZ1_2S )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
319 gc()
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
320
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
321 mosaicsParam <- new( "MosaicsFitParam", k=k, d=d )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
322
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
323 new( "MosaicsFit",
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
324 mosaicsEst=mosaicsEst, mosaicsParam=mosaicsParam,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
325 chrID=binData@chrID, coord=binData@coord,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
326 tagCount=binData@tagCount, input=binData@input,
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
327 bic1S=fitBIC_1S, bic2S=fitBIC_2S )
b6d0c6ceda2c Uploaded
dongjun
parents:
diff changeset
328 }