annotate mlmm/source_library/mlmm1.r @ 0:6b7107812931 draft

Uploaded
author dereeper
date Thu, 02 Jul 2015 05:42:38 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6b7107812931 Uploaded
dereeper
parents:
diff changeset
1 ##############################################################################################################################################
6b7107812931 Uploaded
dereeper
parents:
diff changeset
2 ###MLMM - Multi-Locus Mixed Model
6b7107812931 Uploaded
dereeper
parents:
diff changeset
3 ###SET OF FUNCTIONS TO CARRY GWAS CORRECTING FOR POPULATION STRUCTURE WHILE INCLUDING COFACTORS THROUGH A STEPWISE-REGRESSION APPROACH
6b7107812931 Uploaded
dereeper
parents:
diff changeset
4 #######
6b7107812931 Uploaded
dereeper
parents:
diff changeset
5 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
6 ##note: require EMMA
6b7107812931 Uploaded
dereeper
parents:
diff changeset
7 #library(emma)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
8 #source('emma.r')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
9 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
10 ##REQUIRED DATA & FORMAT
6b7107812931 Uploaded
dereeper
parents:
diff changeset
11 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
12 #PHENOTYPE - Y: a vector of length m, with names(Y)=individual names
6b7107812931 Uploaded
dereeper
parents:
diff changeset
13 #GENOTYPE - X: a n by m matrix, where n=number of individuals, m=n umber of SNPs, with rownames(X)=individual names, and colnames(X)=SNP names
6b7107812931 Uploaded
dereeper
parents:
diff changeset
14 #KINSHIP - K: a n by n matrix, with rownames(K)=colnames(K)=individual names
6b7107812931 Uploaded
dereeper
parents:
diff changeset
15 #each of these data being sorted in the same way, according to the individual name
6b7107812931 Uploaded
dereeper
parents:
diff changeset
16 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
17 ##FOR PLOTING THE GWAS RESULTS
6b7107812931 Uploaded
dereeper
parents:
diff changeset
18 #SNP INFORMATION - snp_info: a data frame having at least 3 columns:
6b7107812931 Uploaded
dereeper
parents:
diff changeset
19 # - 1 named 'SNP', with SNP names (same as colnames(X)),
6b7107812931 Uploaded
dereeper
parents:
diff changeset
20 # - 1 named 'Chr', with the chromosome number to which belong each SNP
6b7107812931 Uploaded
dereeper
parents:
diff changeset
21 # - 1 named 'Pos', with the position of the SNP onto the chromosome it belongs to.
6b7107812931 Uploaded
dereeper
parents:
diff changeset
22 #######
6b7107812931 Uploaded
dereeper
parents:
diff changeset
23 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
24 ##FUNCTIONS USE
6b7107812931 Uploaded
dereeper
parents:
diff changeset
25 #save this file somewhere on your computer and source it!
6b7107812931 Uploaded
dereeper
parents:
diff changeset
26 #source('path/mlmm.r')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
27 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
28 ###FORWARD + BACKWARD ANALYSES
6b7107812931 Uploaded
dereeper
parents:
diff changeset
29 #mygwas<-mlmm(Y,X,K,nbchunks,maxsteps)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
30 #X,Y,K as described above
6b7107812931 Uploaded
dereeper
parents:
diff changeset
31 #nbchunks: an integer defining the number of chunks of to run the analysis, allows to decrease the memory usage ==> minimum=2, increase it if you do not have enough memory
6b7107812931 Uploaded
dereeper
parents:
diff changeset
32 #maxsteps: maximum number of steps desired in the forward approach. The forward approach breaks automatically once the pseudo-heritability is close to 0,
6b7107812931 Uploaded
dereeper
parents:
diff changeset
33 # however to avoid doing too many steps in case the pseudo-heritability does not reach a value close to 0, this parameter is also used.
6b7107812931 Uploaded
dereeper
parents:
diff changeset
34 # It's value must be specified as an integer >= 3
6b7107812931 Uploaded
dereeper
parents:
diff changeset
35 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
36 ###RESULTS
6b7107812931 Uploaded
dereeper
parents:
diff changeset
37 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
38 ##STEPWISE TABLE
6b7107812931 Uploaded
dereeper
parents:
diff changeset
39 #mygwas$step_table
6b7107812931 Uploaded
dereeper
parents:
diff changeset
40 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
41 ##PLOTS
6b7107812931 Uploaded
dereeper
parents:
diff changeset
42 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
43 ##PLOTS FORM THE FORWARD TABLE
6b7107812931 Uploaded
dereeper
parents:
diff changeset
44 #plot_step_table(mygwas,type=c('h2','maxpval','BIC','extBIC'))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
45 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
46 ##RSS PLOT
6b7107812931 Uploaded
dereeper
parents:
diff changeset
47 #plot_step_RSS(mygwas)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
48 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
49 ##GWAS MANHATTAN PLOTS
6b7107812931 Uploaded
dereeper
parents:
diff changeset
50 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
51 #FORWARD STEPS
6b7107812931 Uploaded
dereeper
parents:
diff changeset
52 #plot_fwd_GWAS(mygwas,step,snp_info,pval_filt)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
53 #step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
54 #snp_info as described above
6b7107812931 Uploaded
dereeper
parents:
diff changeset
55 #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot
6b7107812931 Uploaded
dereeper
parents:
diff changeset
56 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
57 #OPTIMAL MODELS
6b7107812931 Uploaded
dereeper
parents:
diff changeset
58 #Automatic identification of the optimal models within the forwrad-backward models according to the extendedBIC or multiple-bonferonni criteria
6b7107812931 Uploaded
dereeper
parents:
diff changeset
59 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
60 #plot_opt_GWAS(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
61 #snp_info as described above
6b7107812931 Uploaded
dereeper
parents:
diff changeset
62 #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot
6b7107812931 Uploaded
dereeper
parents:
diff changeset
63 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
64 ##GWAS MANHATTAN PLOT ZOOMED IN A REGION OF INTEREST
6b7107812931 Uploaded
dereeper
parents:
diff changeset
65 #plot_fwd_region(mygwas,step,snp_info,pval_filt,chrom,pos1,pos2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
66 #step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
67 #snp_info as described above
6b7107812931 Uploaded
dereeper
parents:
diff changeset
68 #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot
6b7107812931 Uploaded
dereeper
parents:
diff changeset
69 #chrom is an integer specifying the chromosome on which the region of interest is
6b7107812931 Uploaded
dereeper
parents:
diff changeset
70 #pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info
6b7107812931 Uploaded
dereeper
parents:
diff changeset
71 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
72 #plot_opt_region(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt,chrom,pos1,pos2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
73 #snp_info as described above
6b7107812931 Uploaded
dereeper
parents:
diff changeset
74 #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot
6b7107812931 Uploaded
dereeper
parents:
diff changeset
75 #chrom is an integer specifying the chromosome on which the region of interest is
6b7107812931 Uploaded
dereeper
parents:
diff changeset
76 #pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info
6b7107812931 Uploaded
dereeper
parents:
diff changeset
77 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
78 ##QQPLOTS of pvalues
6b7107812931 Uploaded
dereeper
parents:
diff changeset
79 #qqplot_fwd_GWAS(mygwas,nsteps)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
80 #nsteps=maximum number of forward steps to be displayed
6b7107812931 Uploaded
dereeper
parents:
diff changeset
81 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
82 #qqplot_opt_GWAS(mygwas,opt=c('extBIC','mbonf'))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
83 #
6b7107812931 Uploaded
dereeper
parents:
diff changeset
84 ##############################################################################################################################################
6b7107812931 Uploaded
dereeper
parents:
diff changeset
85
6b7107812931 Uploaded
dereeper
parents:
diff changeset
86 mlmm<-function(Y,X,K,nbchunks,maxsteps) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
87
6b7107812931 Uploaded
dereeper
parents:
diff changeset
88 n<-length(Y)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
89 m<-ncol(X)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
90
6b7107812931 Uploaded
dereeper
parents:
diff changeset
91 stopifnot(ncol(K) == n)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
92 stopifnot(nrow(K) == n)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
93 stopifnot(nrow(X) == n)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
94 stopifnot(nbchunks >= 2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
95 stopifnot(maxsteps >= 3)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
96
6b7107812931 Uploaded
dereeper
parents:
diff changeset
97 #INTERCEPT
6b7107812931 Uploaded
dereeper
parents:
diff changeset
98
6b7107812931 Uploaded
dereeper
parents:
diff changeset
99 Xo<-rep(1,n)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
100
6b7107812931 Uploaded
dereeper
parents:
diff changeset
101 #K MATRIX NORMALISATION
6b7107812931 Uploaded
dereeper
parents:
diff changeset
102
6b7107812931 Uploaded
dereeper
parents:
diff changeset
103 K_norm<-(n-1)/sum((diag(n)-matrix(1,n,n)/n)*K)*K
6b7107812931 Uploaded
dereeper
parents:
diff changeset
104 rm(K)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
105
6b7107812931 Uploaded
dereeper
parents:
diff changeset
106 #step 0 : NULL MODEL
6b7107812931 Uploaded
dereeper
parents:
diff changeset
107 cof_fwd<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
108 cof_fwd[[1]]<-as.matrix(Xo)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
109 colnames(cof_fwd[[1]])<-'Xo'
6b7107812931 Uploaded
dereeper
parents:
diff changeset
110
6b7107812931 Uploaded
dereeper
parents:
diff changeset
111 mod_fwd<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
112 mod_fwd[[1]]<-emma.REMLE(Y,cof_fwd[[1]],K_norm)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
113
6b7107812931 Uploaded
dereeper
parents:
diff changeset
114 herit_fwd<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
115 herit_fwd[[1]]<-mod_fwd[[1]]$vg/(mod_fwd[[1]]$vg+mod_fwd[[1]]$ve)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
116
6b7107812931 Uploaded
dereeper
parents:
diff changeset
117 RSSf<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
118 RSSf[[1]]<-'NA'
6b7107812931 Uploaded
dereeper
parents:
diff changeset
119
6b7107812931 Uploaded
dereeper
parents:
diff changeset
120 RSS_H0<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
121 RSS_H0[[1]]<-'NA'
6b7107812931 Uploaded
dereeper
parents:
diff changeset
122
6b7107812931 Uploaded
dereeper
parents:
diff changeset
123 df1<-1
6b7107812931 Uploaded
dereeper
parents:
diff changeset
124 df2<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
125 df2[[1]]<-'NA'
6b7107812931 Uploaded
dereeper
parents:
diff changeset
126
6b7107812931 Uploaded
dereeper
parents:
diff changeset
127 Ftest<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
128 Ftest[[1]]<-'NA'
6b7107812931 Uploaded
dereeper
parents:
diff changeset
129
6b7107812931 Uploaded
dereeper
parents:
diff changeset
130 pval<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
131 pval[[1]]<-'NA'
6b7107812931 Uploaded
dereeper
parents:
diff changeset
132
6b7107812931 Uploaded
dereeper
parents:
diff changeset
133 fwd_lm<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
134
6b7107812931 Uploaded
dereeper
parents:
diff changeset
135 cat('null model done! pseudo-h=',round(herit_fwd[[1]],3),'\n')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
136
6b7107812931 Uploaded
dereeper
parents:
diff changeset
137 #step 1 : EMMAX
6b7107812931 Uploaded
dereeper
parents:
diff changeset
138
6b7107812931 Uploaded
dereeper
parents:
diff changeset
139 M<-solve(chol(mod_fwd[[1]]$vg*K_norm+mod_fwd[[1]]$ve*diag(n)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
140 Y_t<-crossprod(M,Y)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
141 cof_fwd_t<-crossprod(M,cof_fwd[[1]])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
142 fwd_lm[[1]]<-summary(lm(Y_t~0+cof_fwd_t))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
143 Res_H0<-fwd_lm[[1]]$residuals
6b7107812931 Uploaded
dereeper
parents:
diff changeset
144 Q_<-qr.Q(qr(cof_fwd_t))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
145
6b7107812931 Uploaded
dereeper
parents:
diff changeset
146 RSS<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
147 for (j in 1:(nbchunks-1)) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
148 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[1]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
149 RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)})
6b7107812931 Uploaded
dereeper
parents:
diff changeset
150 rm(X_t)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
151 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[1]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[1]])-1))])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
152 RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)})
6b7107812931 Uploaded
dereeper
parents:
diff changeset
153 rm(X_t,j)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
154
6b7107812931 Uploaded
dereeper
parents:
diff changeset
155 RSSf[[2]]<-unlist(RSS)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
156 RSS_H0[[2]]<-sum(Res_H0^2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
157 df2[[2]]<-n-df1-ncol(cof_fwd[[1]])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
158 Ftest[[2]]<-(rep(RSS_H0[[2]],length(RSSf[[2]]))/RSSf[[2]]-1)*df2[[2]]/df1
6b7107812931 Uploaded
dereeper
parents:
diff changeset
159 pval[[2]]<-pf(Ftest[[2]],df1,df2[[2]],lower.tail=FALSE)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
160
6b7107812931 Uploaded
dereeper
parents:
diff changeset
161 cof_fwd[[2]]<-cbind(cof_fwd[[1]],X[,colnames(X) %in% names(which(RSSf[[2]]==min(RSSf[[2]]))[1])])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
162 colnames(cof_fwd[[2]])<-c(colnames(cof_fwd[[1]]),names(which(RSSf[[2]]==min(RSSf[[2]]))[1]))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
163 mod_fwd[[2]]<-emma.REMLE(Y,cof_fwd[[2]],K_norm)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
164 herit_fwd[[2]]<-mod_fwd[[2]]$vg/(mod_fwd[[2]]$vg+mod_fwd[[2]]$ve)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
165 rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
166
6b7107812931 Uploaded
dereeper
parents:
diff changeset
167 cat('step 1 done! pseudo-h=',round(herit_fwd[[2]],3),'\n')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
168
6b7107812931 Uploaded
dereeper
parents:
diff changeset
169 #FORWARD
6b7107812931 Uploaded
dereeper
parents:
diff changeset
170
6b7107812931 Uploaded
dereeper
parents:
diff changeset
171 for (i in 3:(maxsteps)) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
172 if (herit_fwd[[i-2]] < 0.01) break else {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
173
6b7107812931 Uploaded
dereeper
parents:
diff changeset
174 M<-solve(chol(mod_fwd[[i-1]]$vg*K_norm+mod_fwd[[i-1]]$ve*diag(n)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
175 Y_t<-crossprod(M,Y)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
176 cof_fwd_t<-crossprod(M,cof_fwd[[i-1]])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
177 fwd_lm[[i-1]]<-summary(lm(Y_t~0+cof_fwd_t))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
178 Res_H0<-fwd_lm[[i-1]]$residuals
6b7107812931 Uploaded
dereeper
parents:
diff changeset
179 Q_ <- qr.Q(qr(cof_fwd_t))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
180
6b7107812931 Uploaded
dereeper
parents:
diff changeset
181 RSS<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
182 for (j in 1:(nbchunks-1)) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
183 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[i-1]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
184 RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)})
6b7107812931 Uploaded
dereeper
parents:
diff changeset
185 rm(X_t)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
186 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[i-1]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[i-1]])-1))])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
187 RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)})
6b7107812931 Uploaded
dereeper
parents:
diff changeset
188 rm(X_t,j)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
189
6b7107812931 Uploaded
dereeper
parents:
diff changeset
190 RSSf[[i]]<-unlist(RSS)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
191 RSS_H0[[i]]<-sum(Res_H0^2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
192 df2[[i]]<-n-df1-ncol(cof_fwd[[i-1]])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
193 Ftest[[i]]<-(rep(RSS_H0[[i]],length(RSSf[[i]]))/RSSf[[i]]-1)*df2[[i]]/df1
6b7107812931 Uploaded
dereeper
parents:
diff changeset
194 pval[[i]]<-pf(Ftest[[i]],df1,df2[[i]],lower.tail=FALSE)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
195
6b7107812931 Uploaded
dereeper
parents:
diff changeset
196 cof_fwd[[i]]<-cbind(cof_fwd[[i-1]],X[,colnames(X) %in% names(which(RSSf[[i]]==min(RSSf[[i]]))[1])])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
197 colnames(cof_fwd[[i]])<-c(colnames(cof_fwd[[i-1]]),names(which(RSSf[[i]]==min(RSSf[[i]]))[1]))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
198 mod_fwd[[i]]<-emma.REMLE(Y,cof_fwd[[i]],K_norm)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
199 herit_fwd[[i]]<-mod_fwd[[i]]$vg/(mod_fwd[[i]]$vg+mod_fwd[[i]]$ve)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
200 rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
201 cat('step ',i-1,' done! pseudo-h=',round(herit_fwd[[i]],3),'\n')}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
202 rm(i)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
203
6b7107812931 Uploaded
dereeper
parents:
diff changeset
204 ##gls at last forward step
6b7107812931 Uploaded
dereeper
parents:
diff changeset
205 M<-solve(chol(mod_fwd[[length(mod_fwd)]]$vg*K_norm+mod_fwd[[length(mod_fwd)]]$ve*diag(n)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
206 Y_t<-crossprod(M,Y)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
207 cof_fwd_t<-crossprod(M,cof_fwd[[length(mod_fwd)]])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
208 fwd_lm[[length(mod_fwd)]]<-summary(lm(Y_t~0+cof_fwd_t))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
209
6b7107812931 Uploaded
dereeper
parents:
diff changeset
210 Res_H0<-fwd_lm[[length(mod_fwd)]]$residuals
6b7107812931 Uploaded
dereeper
parents:
diff changeset
211 Q_ <- qr.Q(qr(cof_fwd_t))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
212
6b7107812931 Uploaded
dereeper
parents:
diff changeset
213 RSS<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
214 for (j in 1:(nbchunks-1)) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
215 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[length(mod_fwd)]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
216 RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)})
6b7107812931 Uploaded
dereeper
parents:
diff changeset
217 rm(X_t)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
218 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[length(mod_fwd)]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[length(mod_fwd)]])-1))])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
219 RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)})
6b7107812931 Uploaded
dereeper
parents:
diff changeset
220 rm(X_t,j)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
221
6b7107812931 Uploaded
dereeper
parents:
diff changeset
222 RSSf[[length(mod_fwd)+1]]<-unlist(RSS)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
223 RSS_H0[[length(mod_fwd)+1]]<-sum(Res_H0^2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
224 df2[[length(mod_fwd)+1]]<-n-df1-ncol(cof_fwd[[length(mod_fwd)]])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
225 Ftest[[length(mod_fwd)+1]]<-(rep(RSS_H0[[length(mod_fwd)+1]],length(RSSf[[length(mod_fwd)+1]]))/RSSf[[length(mod_fwd)+1]]-1)*df2[[length(mod_fwd)+1]]/df1
6b7107812931 Uploaded
dereeper
parents:
diff changeset
226 pval[[length(mod_fwd)+1]]<-pf(Ftest[[length(mod_fwd)+1]],df1,df2[[length(mod_fwd)+1]],lower.tail=FALSE)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
227 rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
228
6b7107812931 Uploaded
dereeper
parents:
diff changeset
229 ##get max pval at each forward step
6b7107812931 Uploaded
dereeper
parents:
diff changeset
230 max_pval_fwd<-vector(mode="numeric",length=length(fwd_lm))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
231 max_pval_fwd[1]<-0
6b7107812931 Uploaded
dereeper
parents:
diff changeset
232 for (i in 2:length(fwd_lm)) {max_pval_fwd[i]<-max(fwd_lm[[i]]$coef[2:i,4])}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
233 rm(i)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
234
6b7107812931 Uploaded
dereeper
parents:
diff changeset
235 ##get the number of parameters & Loglikelihood from ML at each step
6b7107812931 Uploaded
dereeper
parents:
diff changeset
236 mod_fwd_LL<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
237 mod_fwd_LL[[1]]<-list(nfixed=ncol(cof_fwd[[1]]),LL=emma.MLE(Y,cof_fwd[[1]],K_norm)$ML)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
238 for (i in 2:length(cof_fwd)) {mod_fwd_LL[[i]]<-list(nfixed=ncol(cof_fwd[[i]]),LL=emma.MLE(Y,cof_fwd[[i]],K_norm)$ML)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
239 rm(i)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
240
6b7107812931 Uploaded
dereeper
parents:
diff changeset
241 cat('backward analysis','\n')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
242
6b7107812931 Uploaded
dereeper
parents:
diff changeset
243 ##BACKWARD (1st step == last fwd step)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
244
6b7107812931 Uploaded
dereeper
parents:
diff changeset
245 dropcof_bwd<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
246 cof_bwd<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
247 mod_bwd <- list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
248 bwd_lm<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
249 herit_bwd<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
250
6b7107812931 Uploaded
dereeper
parents:
diff changeset
251 dropcof_bwd[[1]]<-'NA'
6b7107812931 Uploaded
dereeper
parents:
diff changeset
252 cof_bwd[[1]]<-as.matrix(cof_fwd[[length(mod_fwd)]][,!colnames(cof_fwd[[length(mod_fwd)]]) %in% dropcof_bwd[[1]]])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
253 colnames(cof_bwd[[1]])<-colnames(cof_fwd[[length(mod_fwd)]])[!colnames(cof_fwd[[length(mod_fwd)]]) %in% dropcof_bwd[[1]]]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
254 mod_bwd[[1]]<-emma.REMLE(Y,cof_bwd[[1]],K_norm)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
255 herit_bwd[[1]]<-mod_bwd[[1]]$vg/(mod_bwd[[1]]$vg+mod_bwd[[1]]$ve)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
256 M<-solve(chol(mod_bwd[[1]]$vg*K_norm+mod_bwd[[1]]$ve*diag(n)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
257 Y_t<-crossprod(M,Y)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
258 cof_bwd_t<-crossprod(M,cof_bwd[[1]])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
259 bwd_lm[[1]]<-summary(lm(Y_t~0+cof_bwd_t))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
260
6b7107812931 Uploaded
dereeper
parents:
diff changeset
261 rm(M,Y_t,cof_bwd_t)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
262
6b7107812931 Uploaded
dereeper
parents:
diff changeset
263 for (i in 2:length(mod_fwd)) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
264 dropcof_bwd[[i]]<-(colnames(cof_bwd[[i-1]])[2:ncol(cof_bwd[[i-1]])])[which(abs(bwd_lm[[i-1]]$coef[2:nrow(bwd_lm[[i-1]]$coef),3])==min(abs(bwd_lm[[i-1]]$coef[2:nrow(bwd_lm[[i-1]]$coef),3])))]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
265 cof_bwd[[i]]<-as.matrix(cof_bwd[[i-1]][,!colnames(cof_bwd[[i-1]]) %in% dropcof_bwd[[i]]])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
266 colnames(cof_bwd[[i]])<-colnames(cof_bwd[[i-1]])[!colnames(cof_bwd[[i-1]]) %in% dropcof_bwd[[i]]]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
267 mod_bwd[[i]]<-emma.REMLE(Y,cof_bwd[[i]],K_norm)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
268 herit_bwd[[i]]<-mod_bwd[[i]]$vg/(mod_bwd[[i]]$vg+mod_bwd[[i]]$ve)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
269 M<-solve(chol(mod_bwd[[i]]$vg*K_norm+mod_bwd[[i]]$ve*diag(n)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
270 Y_t<-crossprod(M,Y)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
271 cof_bwd_t<-crossprod(M,cof_bwd[[i]])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
272 bwd_lm[[i]]<-summary(lm(Y_t~0+cof_bwd_t))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
273 rm(M,Y_t,cof_bwd_t)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
274
6b7107812931 Uploaded
dereeper
parents:
diff changeset
275 rm(i)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
276
6b7107812931 Uploaded
dereeper
parents:
diff changeset
277 ##get max pval at each backward step
6b7107812931 Uploaded
dereeper
parents:
diff changeset
278 max_pval_bwd<-vector(mode="numeric",length=length(bwd_lm))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
279 for (i in 1:(length(bwd_lm)-1)) {max_pval_bwd[i]<-max(bwd_lm[[i]]$coef[2:(length(bwd_lm)+1-i),4])}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
280 max_pval_bwd[length(bwd_lm)]<-0
6b7107812931 Uploaded
dereeper
parents:
diff changeset
281
6b7107812931 Uploaded
dereeper
parents:
diff changeset
282 ##get the number of parameters & Loglikelihood from ML at each step
6b7107812931 Uploaded
dereeper
parents:
diff changeset
283 mod_bwd_LL<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
284 mod_bwd_LL[[1]]<-list(nfixed=ncol(cof_bwd[[1]]),LL=emma.MLE(Y,cof_bwd[[1]],K_norm)$ML)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
285 for (i in 2:length(cof_bwd)) {mod_bwd_LL[[i]]<-list(nfixed=ncol(cof_bwd[[i]]),LL=emma.MLE(Y,cof_bwd[[i]],K_norm)$ML)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
286 rm(i)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
287
6b7107812931 Uploaded
dereeper
parents:
diff changeset
288 cat('creating output','\n')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
289
6b7107812931 Uploaded
dereeper
parents:
diff changeset
290 ##Forward Table: Fwd + Bwd Tables
6b7107812931 Uploaded
dereeper
parents:
diff changeset
291 #Compute parameters for model criteria
6b7107812931 Uploaded
dereeper
parents:
diff changeset
292 BIC<-function(x){-2*x$LL+(x$nfixed+1)*log(n)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
293 extBIC<-function(x){BIC(x)+2*lchoose(m,x$nfixed-1)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
294
6b7107812931 Uploaded
dereeper
parents:
diff changeset
295 fwd_table<-data.frame(step=ncol(cof_fwd[[1]])-1,step_=paste('fwd',ncol(cof_fwd[[1]])-1,sep=''),cof='NA',ncof=ncol(cof_fwd[[1]])-1,h2=herit_fwd[[1]]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
296 ,maxpval=max_pval_fwd[1],BIC=BIC(mod_fwd_LL[[1]]),extBIC=extBIC(mod_fwd_LL[[1]]))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
297 for (i in 2:(length(mod_fwd))) {fwd_table<-rbind(fwd_table,
6b7107812931 Uploaded
dereeper
parents:
diff changeset
298 data.frame(step=ncol(cof_fwd[[i]])-1,step_=paste('fwd',ncol(cof_fwd[[i]])-1,sep=''),cof=paste('+',colnames(cof_fwd[[i]])[i],sep=''),ncof=ncol(cof_fwd[[i]])-1,h2=herit_fwd[[i]]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
299 ,maxpval=max_pval_fwd[i],BIC=BIC(mod_fwd_LL[[i]]),extBIC=extBIC(mod_fwd_LL[[i]])))}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
300
6b7107812931 Uploaded
dereeper
parents:
diff changeset
301 rm(i)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
302
6b7107812931 Uploaded
dereeper
parents:
diff changeset
303 bwd_table<-data.frame(step=length(mod_fwd),step_=paste('bwd',0,sep=''),cof=paste('-',dropcof_bwd[[1]],sep=''),ncof=ncol(cof_bwd[[1]])-1,h2=herit_bwd[[1]]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
304 ,maxpval=max_pval_bwd[1],BIC=BIC(mod_bwd_LL[[1]]),extBIC=extBIC(mod_bwd_LL[[1]]))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
305 for (i in 2:(length(mod_bwd))) {bwd_table<-rbind(bwd_table,
6b7107812931 Uploaded
dereeper
parents:
diff changeset
306 data.frame(step=length(mod_fwd)+i-1,step_=paste('bwd',i-1,sep=''),cof=paste('-',dropcof_bwd[[i]],sep=''),ncof=ncol(cof_bwd[[i]])-1,h2=herit_bwd[[i]]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
307 ,maxpval=max_pval_bwd[i],BIC=BIC(mod_bwd_LL[[i]]),extBIC=extBIC(mod_bwd_LL[[i]])))}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
308
6b7107812931 Uploaded
dereeper
parents:
diff changeset
309 rm(i,BIC,extBIC,max_pval_fwd,max_pval_bwd,dropcof_bwd)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
310
6b7107812931 Uploaded
dereeper
parents:
diff changeset
311 fwdbwd_table<-rbind(fwd_table,bwd_table)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
312
6b7107812931 Uploaded
dereeper
parents:
diff changeset
313 #RSS for plot
6b7107812931 Uploaded
dereeper
parents:
diff changeset
314 mod_fwd_RSS<-vector()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
315 mod_fwd_RSS[1]<-sum((Y-cof_fwd[[1]]%*%fwd_lm[[1]]$coef[,1])^2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
316 for (i in 2:length(mod_fwd)) {mod_fwd_RSS[i]<-sum((Y-cof_fwd[[i]]%*%fwd_lm[[i]]$coef[,1])^2)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
317 mod_bwd_RSS<-vector()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
318 mod_bwd_RSS[1]<-sum((Y-cof_bwd[[1]]%*%bwd_lm[[1]]$coef[,1])^2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
319 for (i in 2:length(mod_bwd)) {mod_bwd_RSS[i]<-sum((Y-cof_bwd[[i]]%*%bwd_lm[[i]]$coef[,1])^2)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
320 expl_RSS<-c(1-sapply(mod_fwd_RSS,function(x){x/mod_fwd_RSS[1]}),1-sapply(mod_bwd_RSS,function(x){x/mod_bwd_RSS[length(mod_bwd_RSS)]}))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
321 h2_RSS<-c(unlist(herit_fwd),unlist(herit_bwd))*(1-expl_RSS)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
322 unexpl_RSS<-1-expl_RSS-h2_RSS
6b7107812931 Uploaded
dereeper
parents:
diff changeset
323 plot_RSS<-t(apply(cbind(expl_RSS,h2_RSS,unexpl_RSS),1,cumsum))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
324
6b7107812931 Uploaded
dereeper
parents:
diff changeset
325 #GLS pvals at each step
6b7107812931 Uploaded
dereeper
parents:
diff changeset
326 pval_step<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
327 pval_step[[1]]<-list(out=data.frame('SNP'=colnames(X),'pval'=pval[[2]]),cof='')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
328 for (i in 2:(length(mod_fwd))) {pval_step[[i]]<-list(out=rbind(data.frame(SNP=colnames(cof_fwd[[i]])[-1],'pval'=fwd_lm[[i]]$coef[2:i,4]),
6b7107812931 Uploaded
dereeper
parents:
diff changeset
329 data.frame(SNP=colnames(X)[-which(colnames(X) %in% colnames(cof_fwd[[i]]))],'pval'=pval[[i+1]])),cof=colnames(cof_fwd[[i]])[-1])}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
330
6b7107812931 Uploaded
dereeper
parents:
diff changeset
331 #GLS pvals for best models according to extBIC and mbonf
6b7107812931 Uploaded
dereeper
parents:
diff changeset
332
6b7107812931 Uploaded
dereeper
parents:
diff changeset
333 opt_extBIC<-fwdbwd_table[which(fwdbwd_table$extBIC==min(fwdbwd_table$extBIC))[1],]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
334 opt_mbonf<-(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),])[which(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),]$ncof==max(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),]$ncof))[1],]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
335 bestmodel_pvals<-function(model) {if(substr(model$step_,start=0,stop=3)=='fwd') {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
336 pval_step[[as.integer(substring(model$step_,first=4))+1]]} else if (substr(model$step_,start=0,stop=3)=='bwd') {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
337 cof<-cof_bwd[[as.integer(substring(model$step_,first=4))+1]]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
338 mixedmod<-emma.REMLE(Y,cof,K_norm)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
339 M<-solve(chol(mixedmod$vg*K_norm+mixedmod$ve*diag(n)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
340 Y_t<-crossprod(M,Y)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
341 cof_t<-crossprod(M,cof)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
342 GLS_lm<-summary(lm(Y_t~0+cof_t))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
343 Res_H0<-GLS_lm$residuals
6b7107812931 Uploaded
dereeper
parents:
diff changeset
344 Q_ <- qr.Q(qr(cof_t))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
345 RSS<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
346 for (j in 1:(nbchunks-1)) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
347 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof)])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
348 RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)})
6b7107812931 Uploaded
dereeper
parents:
diff changeset
349 rm(X_t)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
350 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof)])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof)-1))])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
351 RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)})
6b7107812931 Uploaded
dereeper
parents:
diff changeset
352 rm(X_t,j)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
353 RSSf<-unlist(RSS)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
354 RSS_H0<-sum(Res_H0^2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
355 df2<-n-df1-ncol(cof)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
356 Ftest<-(rep(RSS_H0,length(RSSf))/RSSf-1)*df2/df1
6b7107812931 Uploaded
dereeper
parents:
diff changeset
357 pval<-pf(Ftest,df1,df2,lower.tail=FALSE)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
358 list(out=rbind(data.frame(SNP=colnames(cof)[-1],'pval'=GLS_lm$coef[2:(ncol(cof)),4]),
6b7107812931 Uploaded
dereeper
parents:
diff changeset
359 data.frame('SNP'=colnames(X)[-which(colnames(X) %in% colnames(cof))],'pval'=pval)),cof=colnames(cof)[-1])} else {cat('error \n')}}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
360 opt_extBIC_out<-bestmodel_pvals(opt_extBIC)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
361 opt_mbonf_out<-bestmodel_pvals(opt_mbonf)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
362
6b7107812931 Uploaded
dereeper
parents:
diff changeset
363 list(step_table=fwdbwd_table,pval_step=pval_step,RSSout=plot_RSS,bonf_thresh=-log10(0.05/m),opt_extBIC=opt_extBIC_out,opt_mbonf=opt_mbonf_out)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
364
6b7107812931 Uploaded
dereeper
parents:
diff changeset
365 plot_step_table<-function(x,type){
6b7107812931 Uploaded
dereeper
parents:
diff changeset
366 if (type=='h2') {plot(x$step_table$step,x$step_table$h2,type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='h2')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
367 abline(v=(nrow(x$step_table)/2-0.5),lty=2)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
368 else if (type=='maxpval'){plot(x$step_table$step,-log10(x$step_table$maxpval),type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='-log10(max_Pval)')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
369 abline(h=x$bonf_thresh,lty=2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
370 abline(v=(nrow(x$step_table)/2-0.5),lty=2)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
371 else if (type=='BIC'){plot(x$step_table$step,x$step_table$BIC,type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='BIC')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
372 abline(v=(nrow(x$step_table)/2-0.5),lty=2)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
373 else if (type=='extBIC'){plot(x$step_table$step,x$step_table$extBIC,type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='EBIC')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
374 abline(v=(nrow(x$step_table)/2-0.5),lty=2)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
375 else {cat('error! \n argument type must be one of h2, maxpval, BIC, extBIC')}}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
376
6b7107812931 Uploaded
dereeper
parents:
diff changeset
377 plot_step_RSS<-function(x){
6b7107812931 Uploaded
dereeper
parents:
diff changeset
378 op<-par(mar=c(5, 5, 2, 2))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
379 plot(0,0,xlim=c(0,nrow(x$RSSout)-1),ylim=c(0,1),xlab='step',ylab='%var',col=0)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
380 polygon(c(0:(nrow(x$RSSout)-1),(nrow(x$RSSout)-1),0), c(x$RSSout[,3],0,0), col='brown1', border=0)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
381 polygon(c(0:(nrow(x$RSSout)-1),(nrow(x$RSSout)-1),0), c(x$RSSout[,2],0,0), col='forestgreen', border=0)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
382 polygon(c(0:(nrow(x$RSSout)-1),(nrow(x$RSSout)-1),0), c(x$RSSout[,1],0,0), col='dodgerblue4', border=0)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
383 abline(v=(nrow(x$step_table)/2-0.5),lty=2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
384 par(op)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
385
6b7107812931 Uploaded
dereeper
parents:
diff changeset
386 plot_GWAS<-function(x) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
387 output_<-x$out[order(x$out$Pos),]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
388 output_ok<-output_[order(output_$Chr),]
6b7107812931 Uploaded
dereeper
parents:
diff changeset
389
6b7107812931 Uploaded
dereeper
parents:
diff changeset
390 maxpos<-c(0,cumsum(as.numeric(aggregate(output_ok$Pos,list(output_ok$Chr),max)$x+max(cumsum(as.numeric(aggregate(output_ok$Pos,list(output_ok$Chr),max)$x)))/200)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
391 plot_col<-rep(c('gray10','gray60'),ceiling(max(unique(output_ok$Chr))/2))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
392 # plot_col<-c('blue','darkgreen','red','cyan','purple')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
393 size<-aggregate(output_ok$Pos,list(output_ok$Chr),length)$x
6b7107812931 Uploaded
dereeper
parents:
diff changeset
394 a<-rep(maxpos[1],size[1])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
395 b<-rep(plot_col[1],size[1])
6b7107812931 Uploaded
dereeper
parents:
diff changeset
396 for (i in 2:max(unique(output_ok$Chr))){
6b7107812931 Uploaded
dereeper
parents:
diff changeset
397 a<-c(a,rep(maxpos[i],size[i]))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
398 b<-c(b,rep(plot_col[i],size[i]))}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
399
6b7107812931 Uploaded
dereeper
parents:
diff changeset
400 output_ok$xpos<-output_ok$Pos+a
6b7107812931 Uploaded
dereeper
parents:
diff changeset
401 output_ok$col<-b
6b7107812931 Uploaded
dereeper
parents:
diff changeset
402 output_ok$col[output_ok$SNP %in% x$cof]<-'red'
6b7107812931 Uploaded
dereeper
parents:
diff changeset
403
6b7107812931 Uploaded
dereeper
parents:
diff changeset
404 d<-(aggregate(output_ok$xpos,list(output_ok$Chr),min)$x+aggregate(output_ok$xpos,list(output_ok$Chr),max)$x)/2
6b7107812931 Uploaded
dereeper
parents:
diff changeset
405
6b7107812931 Uploaded
dereeper
parents:
diff changeset
406 plot(output_ok$xpos,-log10(output_ok$pval),col=output_ok$col,pch=20,ylab='-log10(pval)',xaxt='n',xlab='chromosome')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
407 axis(1,tick=FALSE,at=d,labels=c(1:max(unique(output_ok$Chr))))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
408 abline(h=x$bonf_thresh,lty=3,col='black')
6b7107812931 Uploaded
dereeper
parents:
diff changeset
409
6b7107812931 Uploaded
dereeper
parents:
diff changeset
410
6b7107812931 Uploaded
dereeper
parents:
diff changeset
411 if (length(output_ok$pval[-log10(output_ok$pval) > x$bonf_thresh]) > 0) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
412 text(output_ok$xpos[-log10(output_ok$pval) > x$bonf_thresh], -log10(output_ok$pval[-log10(output_ok$pval) > x$bonf_thresh]), output_ok$SNP[-log10(output_ok$pval) > x$bonf_thresh], pos=3, cex=0.7)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
413 legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" "))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
414 } else {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
415 legend("topright", lty=3, paste("bonf thresh :", x$bonf_thresh ,sep=" "))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
416 }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
417 }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
418
6b7107812931 Uploaded
dereeper
parents:
diff changeset
419 plot_region<-function(x,chrom,pos1,pos2){
6b7107812931 Uploaded
dereeper
parents:
diff changeset
420 region<-subset(x$out,Chr==chrom & Pos>=pos1 & Pos <=pos2)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
421 region$col<- if (chrom %% 2 == 0) {'gray60'} else {'gray10'}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
422 region$col[which(region$SNP %in% x$cof)]<-'red'
6b7107812931 Uploaded
dereeper
parents:
diff changeset
423 plot(region$Pos,-log10(region$pval),type='p',pch=20,main=paste('chromosome',chrom,sep=''),xlab='position (bp)',ylab='-log10(pval)',col=region$col,xlim=c(pos1,pos2))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
424 abline(h=x$bonf_thresh,lty=3,col='black')}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
425
6b7107812931 Uploaded
dereeper
parents:
diff changeset
426
6b7107812931 Uploaded
dereeper
parents:
diff changeset
427 plot_fwd_GWAS<-function(x,step,snp_info,pval_filt) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
428 stopifnot(step<=length(x$pval_step))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
429 output<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by='SNP'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
430 plot_GWAS(output)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
431
6b7107812931 Uploaded
dereeper
parents:
diff changeset
432 plot_fwd_region<-function(x,step,snp_info,pval_filt,chrom,pos1,pos2) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
433 stopifnot(step<=length(x$pval_step))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
434 output<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by='SNP'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
435 plot_region(output,chrom,pos1,pos2)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
436
6b7107812931 Uploaded
dereeper
parents:
diff changeset
437
6b7107812931 Uploaded
dereeper
parents:
diff changeset
438 plot_opt_GWAS<-function(x,opt,snp_info,pval_filt) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
439 if (opt=='extBIC') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by='SNP'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
440 plot_GWAS(output)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
441 else if (opt=='mbonf') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by='SNP'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
442 plot_GWAS(output)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
443 else {cat('error! \n opt must be extBIC or mbonf')}}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
444
6b7107812931 Uploaded
dereeper
parents:
diff changeset
445 plot_opt_region<-function(x,opt,snp_info,pval_filt,chrom,pos1,pos2) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
446 if (opt=='extBIC') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by='SNP'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
447 plot_region(output,chrom,pos1,pos2)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
448 else if (opt=='mbonf') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by='SNP'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
449 plot_region(output,chrom,pos1,pos2)}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
450 else {cat('error! \n opt must be extBIC or mbonf')}}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
451
6b7107812931 Uploaded
dereeper
parents:
diff changeset
452
6b7107812931 Uploaded
dereeper
parents:
diff changeset
453 qqplot_fwd_GWAS<-function(x,nsteps){
6b7107812931 Uploaded
dereeper
parents:
diff changeset
454 stopifnot(nsteps<=length(x$pval_step))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
455 e<--log10(ppoints(nrow(x$pval_step[[1]]$out)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
456 ostep<-list()
6b7107812931 Uploaded
dereeper
parents:
diff changeset
457 ostep[[1]]<--log10(sort(x$pval_step[[1]]$out$pval))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
458 for (i in 2:nsteps) {ostep[[i]]<--log10(sort(x$pval_step[[i]]$out$pval))}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
459
6b7107812931 Uploaded
dereeper
parents:
diff changeset
460 maxp<-ceiling(max(unlist(ostep)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
461
6b7107812931 Uploaded
dereeper
parents:
diff changeset
462 plot(e,ostep[[1]],type='b',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
463 abline(0,1,col="dark grey")
6b7107812931 Uploaded
dereeper
parents:
diff changeset
464
6b7107812931 Uploaded
dereeper
parents:
diff changeset
465 for (i in 2:nsteps) {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
466 par(new=T)
6b7107812931 Uploaded
dereeper
parents:
diff changeset
467 plot(e,ostep[[i]],type='b',pch=20,cex=0.8,col=i,axes='F',xlab='',ylab='',xlim=c(0,max(e)+1),ylim=c(0,maxp))}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
468 legend(0,maxp,lty=1,pch=20,col=c(1:length(ostep)),paste(c(0:(length(ostep)-1)),'cof',sep=' '))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
469 }
6b7107812931 Uploaded
dereeper
parents:
diff changeset
470
6b7107812931 Uploaded
dereeper
parents:
diff changeset
471 qqplot_opt_GWAS<-function(x,opt){
6b7107812931 Uploaded
dereeper
parents:
diff changeset
472 if (opt=='extBIC') {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
473 e<--log10(ppoints(nrow(x$opt_extBIC$out)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
474 o<--log10(sort(x$opt_extBIC$out$pval))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
475 maxp<-ceiling(max(o))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
476 plot(e,o,type='b',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste('optimal model according to extBIC'))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
477 abline(0,1,col="dark grey")}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
478 else if (opt=='mbonf') {
6b7107812931 Uploaded
dereeper
parents:
diff changeset
479 e<--log10(ppoints(nrow(x$opt_mbonf$out)))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
480 o<--log10(sort(x$opt_mbonf$out$pval))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
481 maxp<-ceiling(max(o))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
482 plot(e,o,type='b',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste('optimal model according to mbonf'))
6b7107812931 Uploaded
dereeper
parents:
diff changeset
483 abline(0,1,col="dark grey")}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
484 else {cat('error! \n opt must be extBIC or mbonf')}}
6b7107812931 Uploaded
dereeper
parents:
diff changeset
485
6b7107812931 Uploaded
dereeper
parents:
diff changeset
486