Mercurial > repos > dereeper > mlmm
comparison source_library/mlmm.r @ 1:380b364980f9 draft default tip
planemo upload commit 475f4d7d8442a0d75e103af326ae5881c4d2a4ac
| author | dereeper | 
|---|---|
| date | Mon, 16 Apr 2018 08:50:05 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 0:6b7107812931 | 1:380b364980f9 | 
|---|---|
| 1 ############################################################################################################################################## | |
| 2 ###MLMM - Multi-Locus Mixed Model | |
| 3 ###SET OF FUNCTIONS TO CARRY GWAS CORRECTING FOR POPULATION STRUCTURE WHILE INCLUDING COFACTORS THROUGH A STEPWISE-REGRESSION APPROACH | |
| 4 ####### | |
| 5 # | |
| 6 ##note: require EMMA | |
| 7 #library(emma) | |
| 8 #source('emma.r') | |
| 9 # | |
| 10 ##REQUIRED DATA & FORMAT | |
| 11 # | |
| 12 #PHENOTYPE - Y: a vector of length m, with names(Y)=individual names | |
| 13 #GENOTYPE - X: a n by m matrix, where n=number of individuals, m=number of SNPs, with rownames(X)=individual names, and colnames(X)=SNP names | |
| 14 #KINSHIP - K: a n by n matrix, with rownames(K)=colnames(K)=individual names | |
| 15 #each of these data being sorted in the same way, according to the individual name | |
| 16 # | |
| 17 ##FOR PLOTING THE GWAS RESULTS | |
| 18 #SNP INFORMATION - snp_info: a data frame having at least 3 columns: | |
| 19 # - 1 named 'SNP', with SNP names (same as colnames(X)), | |
| 20 # - 1 named 'Chr', with the chromosome number to which belong each SNP | |
| 21 # - 1 named 'Pos', with the position of the SNP onto the chromosome it belongs to. | |
| 22 ####### | |
| 23 # | |
| 24 ##FUNCTIONS USE | |
| 25 #save this file somewhere on your computer and source it! | |
| 26 #source('path/mlmm.r') | |
| 27 # | |
| 28 ###FORWARD + BACKWARD ANALYSES | |
| 29 #mygwas<-mlmm(Y,X,K,nbchunks,maxsteps) | |
| 30 #X,Y,K as described above | |
| 31 #nbchunks: an integer defining the number of chunks of X to run the analysis, allows to decrease the memory usage ==> minimum=2, increase it if you do not have enough memory | |
| 32 #maxsteps: maximum number of steps desired in the forward approach. The forward approach breaks automatically once the pseudo-heritability is close to 0, | |
| 33 # however to avoid doing too many steps in case the pseudo-heritability does not reach a value close to 0, this parameter is also used. | |
| 34 # It's value must be specified as an integer >= 3 | |
| 35 # | |
| 36 ###RESULTS | |
| 37 # | |
| 38 ##STEPWISE TABLE | |
| 39 #mygwas$step_table | |
| 40 # | |
| 41 ##PLOTS | |
| 42 # | |
| 43 ##PLOTS FORM THE FORWARD TABLE | |
| 44 #plot_step_table(mygwas,type=c('h2','maxpval','BIC','extBIC')) | |
| 45 # | |
| 46 ##RSS PLOT | |
| 47 #plot_step_RSS(mygwas) | |
| 48 # | |
| 49 ##GWAS MANHATTAN PLOTS | |
| 50 # | |
| 51 #FORWARD STEPS | |
| 52 #plot_fwd_GWAS(mygwas,step,snp_info,pval_filt) | |
| 53 #step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor) | |
| 54 #snp_info as described above | |
| 55 #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot | |
| 56 # | |
| 57 #OPTIMAL MODELS | |
| 58 #Automatic identification of the optimal models within the forwrad-backward models according to the extendedBIC or multiple-bonferonni criteria | |
| 59 # | |
| 60 #plot_opt_GWAS(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt) | |
| 61 #snp_info as described above | |
| 62 #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot | |
| 63 # | |
| 64 ##GWAS MANHATTAN PLOT ZOOMED IN A REGION OF INTEREST | |
| 65 #plot_fwd_region(mygwas,step,snp_info,pval_filt,chrom,pos1,pos2) | |
| 66 #step=the step to be plotted in the forward approach, where 1 is the EMMAX scan (no cofactor) | |
| 67 #snp_info as described above | |
| 68 #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot | |
| 69 #chrom is an integer specifying the chromosome on which the region of interest is | |
| 70 #pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info | |
| 71 # | |
| 72 #plot_opt_region(mygwas,opt=c('extBIC','mbonf'),snp_info,pval_filt,chrom,pos1,pos2) | |
| 73 #snp_info as described above | |
| 74 #pval_filt=a p-value threshold for filtering the output, only p-vals below this threshold will be displayed in the plot | |
| 75 #chrom is an integer specifying the chromosome on which the region of interest is | |
| 76 #pos1, pos2 are integers delimiting the region of interest in the same unit as Pos in snp_info | |
| 77 # | |
| 78 ##QQPLOTS of pvalues | |
| 79 #qqplot_fwd_GWAS(mygwas,nsteps) | |
| 80 #nsteps=maximum number of forward steps to be displayed | |
| 81 # | |
| 82 #qqplot_opt_GWAS(mygwas,opt=c('extBIC','mbonf')) | |
| 83 # | |
| 84 ############################################################################################################################################## | |
| 85 | |
| 86 mlmm<-function(Y,X,K,nbchunks,maxsteps) { | |
| 87 | |
| 88 n<-length(Y) | |
| 89 m<-ncol(X) | |
| 90 | |
| 91 stopifnot(ncol(K) == n) | |
| 92 stopifnot(nrow(K) == n) | |
| 93 stopifnot(nrow(X) == n) | |
| 94 stopifnot(nbchunks >= 2) | |
| 95 stopifnot(maxsteps >= 3) | |
| 96 | |
| 97 #INTERCEPT | |
| 98 | |
| 99 Xo<-rep(1,n) | |
| 100 | |
| 101 #K MATRIX NORMALISATION | |
| 102 | |
| 103 K_norm<-(n-1)/sum((diag(n)-matrix(1,n,n)/n)*K)*K | |
| 104 rm(K) | |
| 105 | |
| 106 #step 0 : NULL MODEL | |
| 107 cof_fwd<-list() | |
| 108 cof_fwd[[1]]<-as.matrix(Xo) | |
| 109 colnames(cof_fwd[[1]])<-'Xo' | |
| 110 | |
| 111 mod_fwd<-list() | |
| 112 mod_fwd[[1]]<-emma.REMLE(Y,cof_fwd[[1]],K_norm) | |
| 113 | |
| 114 herit_fwd<-list() | |
| 115 herit_fwd[[1]]<-mod_fwd[[1]]$vg/(mod_fwd[[1]]$vg+mod_fwd[[1]]$ve) | |
| 116 | |
| 117 RSSf<-list() | |
| 118 RSSf[[1]]<-'NA' | |
| 119 | |
| 120 RSS_H0<-list() | |
| 121 RSS_H0[[1]]<-'NA' | |
| 122 | |
| 123 df1<-1 | |
| 124 df2<-list() | |
| 125 df2[[1]]<-'NA' | |
| 126 | |
| 127 Ftest<-list() | |
| 128 Ftest[[1]]<-'NA' | |
| 129 | |
| 130 pval<-list() | |
| 131 pval[[1]]<-'NA' | |
| 132 | |
| 133 fwd_lm<-list() | |
| 134 | |
| 135 cat('null model done! pseudo-h=',round(herit_fwd[[1]],3),'\n') | |
| 136 | |
| 137 #step 1 : EMMAX | |
| 138 | |
| 139 M<-solve(chol(mod_fwd[[1]]$vg*K_norm+mod_fwd[[1]]$ve*diag(n))) | |
| 140 Y_t<-crossprod(M,Y) | |
| 141 cof_fwd_t<-crossprod(M,cof_fwd[[1]]) | |
| 142 fwd_lm[[1]]<-summary(lm(Y_t~0+cof_fwd_t)) | |
| 143 Res_H0<-fwd_lm[[1]]$residuals | |
| 144 Q_<-qr.Q(qr(cof_fwd_t)) | |
| 145 | |
| 146 RSS<-list() | |
| 147 for (j in 1:(nbchunks-1)) { | |
| 148 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[1]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) | |
| 149 RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) | |
| 150 rm(X_t)} | |
| 151 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[1]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[1]])-1))]) | |
| 152 RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) | |
| 153 rm(X_t,j) | |
| 154 | |
| 155 RSSf[[2]]<-unlist(RSS) | |
| 156 RSS_H0[[2]]<-sum(Res_H0^2) | |
| 157 df2[[2]]<-n-df1-ncol(cof_fwd[[1]]) | |
| 158 Ftest[[2]]<-(rep(RSS_H0[[2]],length(RSSf[[2]]))/RSSf[[2]]-1)*df2[[2]]/df1 | |
| 159 pval[[2]]<-pf(Ftest[[2]],df1,df2[[2]],lower.tail=FALSE) | |
| 160 | |
| 161 cof_fwd[[2]]<-cbind(cof_fwd[[1]],X[,colnames(X) %in% names(which(RSSf[[2]]==min(RSSf[[2]]))[1])]) | |
| 162 colnames(cof_fwd[[2]])<-c(colnames(cof_fwd[[1]]),names(which(RSSf[[2]]==min(RSSf[[2]]))[1])) | |
| 163 mod_fwd[[2]]<-emma.REMLE(Y,cof_fwd[[2]],K_norm) | |
| 164 herit_fwd[[2]]<-mod_fwd[[2]]$vg/(mod_fwd[[2]]$vg+mod_fwd[[2]]$ve) | |
| 165 rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS) | |
| 166 | |
| 167 cat('step 1 done! pseudo-h=',round(herit_fwd[[2]],3),'\n') | |
| 168 | |
| 169 #FORWARD | |
| 170 | |
| 171 for (i in 3:(maxsteps)) { | |
| 172 if (herit_fwd[[i-2]] < 0.01) break else { | |
| 173 | |
| 174 M<-solve(chol(mod_fwd[[i-1]]$vg*K_norm+mod_fwd[[i-1]]$ve*diag(n))) | |
| 175 Y_t<-crossprod(M,Y) | |
| 176 cof_fwd_t<-crossprod(M,cof_fwd[[i-1]]) | |
| 177 fwd_lm[[i-1]]<-summary(lm(Y_t~0+cof_fwd_t)) | |
| 178 Res_H0<-fwd_lm[[i-1]]$residuals | |
| 179 Q_ <- qr.Q(qr(cof_fwd_t)) | |
| 180 | |
| 181 RSS<-list() | |
| 182 for (j in 1:(nbchunks-1)) { | |
| 183 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[i-1]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) | |
| 184 RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) | |
| 185 rm(X_t)} | |
| 186 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[i-1]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[i-1]])-1))]) | |
| 187 RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) | |
| 188 rm(X_t,j) | |
| 189 | |
| 190 RSSf[[i]]<-unlist(RSS) | |
| 191 RSS_H0[[i]]<-sum(Res_H0^2) | |
| 192 df2[[i]]<-n-df1-ncol(cof_fwd[[i-1]]) | |
| 193 Ftest[[i]]<-(rep(RSS_H0[[i]],length(RSSf[[i]]))/RSSf[[i]]-1)*df2[[i]]/df1 | |
| 194 pval[[i]]<-pf(Ftest[[i]],df1,df2[[i]],lower.tail=FALSE) | |
| 195 | |
| 196 cof_fwd[[i]]<-cbind(cof_fwd[[i-1]],X[,colnames(X) %in% names(which(RSSf[[i]]==min(RSSf[[i]]))[1])]) | |
| 197 colnames(cof_fwd[[i]])<-c(colnames(cof_fwd[[i-1]]),names(which(RSSf[[i]]==min(RSSf[[i]]))[1])) | |
| 198 mod_fwd[[i]]<-emma.REMLE(Y,cof_fwd[[i]],K_norm) | |
| 199 herit_fwd[[i]]<-mod_fwd[[i]]$vg/(mod_fwd[[i]]$vg+mod_fwd[[i]]$ve) | |
| 200 rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS)} | |
| 201 cat('step ',i-1,' done! pseudo-h=',round(herit_fwd[[i]],3),'\n')} | |
| 202 rm(i) | |
| 203 | |
| 204 ##gls at last forward step | |
| 205 M<-solve(chol(mod_fwd[[length(mod_fwd)]]$vg*K_norm+mod_fwd[[length(mod_fwd)]]$ve*diag(n))) | |
| 206 Y_t<-crossprod(M,Y) | |
| 207 cof_fwd_t<-crossprod(M,cof_fwd[[length(mod_fwd)]]) | |
| 208 fwd_lm[[length(mod_fwd)]]<-summary(lm(Y_t~0+cof_fwd_t)) | |
| 209 | |
| 210 Res_H0<-fwd_lm[[length(mod_fwd)]]$residuals | |
| 211 Q_ <- qr.Q(qr(cof_fwd_t)) | |
| 212 | |
| 213 RSS<-list() | |
| 214 for (j in 1:(nbchunks-1)) { | |
| 215 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[length(mod_fwd)]])])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) | |
| 216 RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) | |
| 217 rm(X_t)} | |
| 218 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof_fwd[[length(mod_fwd)]])])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof_fwd[[length(mod_fwd)]])-1))]) | |
| 219 RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) | |
| 220 rm(X_t,j) | |
| 221 | |
| 222 RSSf[[length(mod_fwd)+1]]<-unlist(RSS) | |
| 223 RSS_H0[[length(mod_fwd)+1]]<-sum(Res_H0^2) | |
| 224 df2[[length(mod_fwd)+1]]<-n-df1-ncol(cof_fwd[[length(mod_fwd)]]) | |
| 225 Ftest[[length(mod_fwd)+1]]<-(rep(RSS_H0[[length(mod_fwd)+1]],length(RSSf[[length(mod_fwd)+1]]))/RSSf[[length(mod_fwd)+1]]-1)*df2[[length(mod_fwd)+1]]/df1 | |
| 226 pval[[length(mod_fwd)+1]]<-pf(Ftest[[length(mod_fwd)+1]],df1,df2[[length(mod_fwd)+1]],lower.tail=FALSE) | |
| 227 rm(M,Y_t,cof_fwd_t,Res_H0,Q_,RSS) | |
| 228 | |
| 229 ##get max pval at each forward step | |
| 230 max_pval_fwd<-vector(mode="numeric",length=length(fwd_lm)) | |
| 231 max_pval_fwd[1]<-0 | |
| 232 for (i in 2:length(fwd_lm)) {max_pval_fwd[i]<-max(fwd_lm[[i]]$coef[2:i,4])} | |
| 233 rm(i) | |
| 234 | |
| 235 ##get the number of parameters & Loglikelihood from ML at each step | |
| 236 mod_fwd_LL<-list() | |
| 237 mod_fwd_LL[[1]]<-list(nfixed=ncol(cof_fwd[[1]]),LL=emma.MLE(Y,cof_fwd[[1]],K_norm)$ML) | |
| 238 for (i in 2:length(cof_fwd)) {mod_fwd_LL[[i]]<-list(nfixed=ncol(cof_fwd[[i]]),LL=emma.MLE(Y,cof_fwd[[i]],K_norm)$ML)} | |
| 239 rm(i) | |
| 240 | |
| 241 cat('backward analysis','\n') | |
| 242 | |
| 243 ##BACKWARD (1st step == last fwd step) | |
| 244 | |
| 245 dropcof_bwd<-list() | |
| 246 cof_bwd<-list() | |
| 247 mod_bwd <- list() | |
| 248 bwd_lm<-list() | |
| 249 herit_bwd<-list() | |
| 250 | |
| 251 dropcof_bwd[[1]]<-'NA' | |
| 252 cof_bwd[[1]]<-as.matrix(cof_fwd[[length(mod_fwd)]][,!colnames(cof_fwd[[length(mod_fwd)]]) %in% dropcof_bwd[[1]]]) | |
| 253 colnames(cof_bwd[[1]])<-colnames(cof_fwd[[length(mod_fwd)]])[!colnames(cof_fwd[[length(mod_fwd)]]) %in% dropcof_bwd[[1]]] | |
| 254 mod_bwd[[1]]<-emma.REMLE(Y,cof_bwd[[1]],K_norm) | |
| 255 herit_bwd[[1]]<-mod_bwd[[1]]$vg/(mod_bwd[[1]]$vg+mod_bwd[[1]]$ve) | |
| 256 M<-solve(chol(mod_bwd[[1]]$vg*K_norm+mod_bwd[[1]]$ve*diag(n))) | |
| 257 Y_t<-crossprod(M,Y) | |
| 258 cof_bwd_t<-crossprod(M,cof_bwd[[1]]) | |
| 259 bwd_lm[[1]]<-summary(lm(Y_t~0+cof_bwd_t)) | |
| 260 | |
| 261 rm(M,Y_t,cof_bwd_t) | |
| 262 | |
| 263 for (i in 2:length(mod_fwd)) { | |
| 264 dropcof_bwd[[i]]<-(colnames(cof_bwd[[i-1]])[2:ncol(cof_bwd[[i-1]])])[which(abs(bwd_lm[[i-1]]$coef[2:nrow(bwd_lm[[i-1]]$coef),3])==min(abs(bwd_lm[[i-1]]$coef[2:nrow(bwd_lm[[i-1]]$coef),3])))] | |
| 265 cof_bwd[[i]]<-as.matrix(cof_bwd[[i-1]][,!colnames(cof_bwd[[i-1]]) %in% dropcof_bwd[[i]]]) | |
| 266 colnames(cof_bwd[[i]])<-colnames(cof_bwd[[i-1]])[!colnames(cof_bwd[[i-1]]) %in% dropcof_bwd[[i]]] | |
| 267 mod_bwd[[i]]<-emma.REMLE(Y,cof_bwd[[i]],K_norm) | |
| 268 herit_bwd[[i]]<-mod_bwd[[i]]$vg/(mod_bwd[[i]]$vg+mod_bwd[[i]]$ve) | |
| 269 M<-solve(chol(mod_bwd[[i]]$vg*K_norm+mod_bwd[[i]]$ve*diag(n))) | |
| 270 Y_t<-crossprod(M,Y) | |
| 271 cof_bwd_t<-crossprod(M,cof_bwd[[i]]) | |
| 272 bwd_lm[[i]]<-summary(lm(Y_t~0+cof_bwd_t)) | |
| 273 rm(M,Y_t,cof_bwd_t)} | |
| 274 | |
| 275 rm(i) | |
| 276 | |
| 277 ##get max pval at each backward step | |
| 278 max_pval_bwd<-vector(mode="numeric",length=length(bwd_lm)) | |
| 279 for (i in 1:(length(bwd_lm)-1)) {max_pval_bwd[i]<-max(bwd_lm[[i]]$coef[2:(length(bwd_lm)+1-i),4])} | |
| 280 max_pval_bwd[length(bwd_lm)]<-0 | |
| 281 | |
| 282 ##get the number of parameters & Loglikelihood from ML at each step | |
| 283 mod_bwd_LL<-list() | |
| 284 mod_bwd_LL[[1]]<-list(nfixed=ncol(cof_bwd[[1]]),LL=emma.MLE(Y,cof_bwd[[1]],K_norm)$ML) | |
| 285 for (i in 2:length(cof_bwd)) {mod_bwd_LL[[i]]<-list(nfixed=ncol(cof_bwd[[i]]),LL=emma.MLE(Y,cof_bwd[[i]],K_norm)$ML)} | |
| 286 rm(i) | |
| 287 | |
| 288 cat('creating output','\n') | |
| 289 | |
| 290 ##Forward Table: Fwd + Bwd Tables | |
| 291 #Compute parameters for model criteria | |
| 292 BIC<-function(x){-2*x$LL+(x$nfixed+1)*log(n)} | |
| 293 extBIC<-function(x){BIC(x)+2*lchoose(m,x$nfixed-1)} | |
| 294 | |
| 295 fwd_table<-data.frame(step=ncol(cof_fwd[[1]])-1,step_=paste('fwd',ncol(cof_fwd[[1]])-1,sep=''),cof='NA',ncof=ncol(cof_fwd[[1]])-1,h2=herit_fwd[[1]] | |
| 296 ,maxpval=max_pval_fwd[1],BIC=BIC(mod_fwd_LL[[1]]),extBIC=extBIC(mod_fwd_LL[[1]])) | |
| 297 for (i in 2:(length(mod_fwd))) {fwd_table<-rbind(fwd_table, | |
| 298 data.frame(step=ncol(cof_fwd[[i]])-1,step_=paste('fwd',ncol(cof_fwd[[i]])-1,sep=''),cof=paste('+',colnames(cof_fwd[[i]])[i],sep=''),ncof=ncol(cof_fwd[[i]])-1,h2=herit_fwd[[i]] | |
| 299 ,maxpval=max_pval_fwd[i],BIC=BIC(mod_fwd_LL[[i]]),extBIC=extBIC(mod_fwd_LL[[i]])))} | |
| 300 | |
| 301 rm(i) | |
| 302 | |
| 303 bwd_table<-data.frame(step=length(mod_fwd),step_=paste('bwd',0,sep=''),cof=paste('-',dropcof_bwd[[1]],sep=''),ncof=ncol(cof_bwd[[1]])-1,h2=herit_bwd[[1]] | |
| 304 ,maxpval=max_pval_bwd[1],BIC=BIC(mod_bwd_LL[[1]]),extBIC=extBIC(mod_bwd_LL[[1]])) | |
| 305 for (i in 2:(length(mod_bwd))) {bwd_table<-rbind(bwd_table, | |
| 306 data.frame(step=length(mod_fwd)+i-1,step_=paste('bwd',i-1,sep=''),cof=paste('-',dropcof_bwd[[i]],sep=''),ncof=ncol(cof_bwd[[i]])-1,h2=herit_bwd[[i]] | |
| 307 ,maxpval=max_pval_bwd[i],BIC=BIC(mod_bwd_LL[[i]]),extBIC=extBIC(mod_bwd_LL[[i]])))} | |
| 308 | |
| 309 rm(i,BIC,extBIC,max_pval_fwd,max_pval_bwd,dropcof_bwd) | |
| 310 | |
| 311 fwdbwd_table<-rbind(fwd_table,bwd_table) | |
| 312 | |
| 313 #RSS for plot | |
| 314 mod_fwd_RSS<-vector() | |
| 315 mod_fwd_RSS[1]<-sum((Y-cof_fwd[[1]]%*%fwd_lm[[1]]$coef[,1])^2) | |
| 316 for (i in 2:length(mod_fwd)) {mod_fwd_RSS[i]<-sum((Y-cof_fwd[[i]]%*%fwd_lm[[i]]$coef[,1])^2)} | |
| 317 mod_bwd_RSS<-vector() | |
| 318 mod_bwd_RSS[1]<-sum((Y-cof_bwd[[1]]%*%bwd_lm[[1]]$coef[,1])^2) | |
| 319 for (i in 2:length(mod_bwd)) {mod_bwd_RSS[i]<-sum((Y-cof_bwd[[i]]%*%bwd_lm[[i]]$coef[,1])^2)} | |
| 320 expl_RSS<-c(1-sapply(mod_fwd_RSS,function(x){x/mod_fwd_RSS[1]}),1-sapply(mod_bwd_RSS,function(x){x/mod_bwd_RSS[length(mod_bwd_RSS)]})) | |
| 321 h2_RSS<-c(unlist(herit_fwd),unlist(herit_bwd))*(1-expl_RSS) | |
| 322 unexpl_RSS<-1-expl_RSS-h2_RSS | |
| 323 plot_RSS<-t(apply(cbind(expl_RSS,h2_RSS,unexpl_RSS),1,cumsum)) | |
| 324 | |
| 325 #GLS pvals at each step | |
| 326 pval_step<-list() | |
| 327 pval_step[[1]]<-list(out=data.frame('SNP'=colnames(X),'pval'=pval[[2]]),cof='') | |
| 328 for (i in 2:(length(mod_fwd))) {pval_step[[i]]<-list(out=rbind(data.frame(SNP=colnames(cof_fwd[[i]])[-1],'pval'=fwd_lm[[i]]$coef[2:i,4]), | |
| 329 data.frame(SNP=colnames(X)[-which(colnames(X) %in% colnames(cof_fwd[[i]]))],'pval'=pval[[i+1]])),cof=colnames(cof_fwd[[i]])[-1])} | |
| 330 | |
| 331 #GLS pvals for best models according to extBIC and mbonf | |
| 332 | |
| 333 opt_extBIC<-fwdbwd_table[which(fwdbwd_table$extBIC==min(fwdbwd_table$extBIC))[1],] | |
| 334 opt_mbonf<-(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),])[which(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),]$ncof==max(fwdbwd_table[which(fwdbwd_table$maxpval<=0.05/m),]$ncof))[1],] | |
| 335 bestmodel_pvals<-function(model) {if(substr(model$step_,start=0,stop=3)=='fwd') { | |
| 336 pval_step[[as.integer(substring(model$step_,first=4))+1]]} else if (substr(model$step_,start=0,stop=3)=='bwd') { | |
| 337 cof<-cof_bwd[[as.integer(substring(model$step_,first=4))+1]] | |
| 338 mixedmod<-emma.REMLE(Y,cof,K_norm) | |
| 339 M<-solve(chol(mixedmod$vg*K_norm+mixedmod$ve*diag(n))) | |
| 340 Y_t<-crossprod(M,Y) | |
| 341 cof_t<-crossprod(M,cof) | |
| 342 GLS_lm<-summary(lm(Y_t~0+cof_t)) | |
| 343 Res_H0<-GLS_lm$residuals | |
| 344 Q_ <- qr.Q(qr(cof_t)) | |
| 345 RSS<-list() | |
| 346 for (j in 1:(nbchunks-1)) { | |
| 347 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof)])[,((j-1)*round(m/nbchunks)+1):(j*round(m/nbchunks))]) | |
| 348 RSS[[j]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) | |
| 349 rm(X_t)} | |
| 350 X_t<-crossprod(M %*% (diag(n)-tcrossprod(Q_,Q_)),(X[,!colnames(X) %in% colnames(cof)])[,((j)*round(m/nbchunks)+1):(m-(ncol(cof)-1))]) | |
| 351 RSS[[nbchunks]]<-apply(X_t,2,function(x){sum(lsfit(x,Res_H0,intercept = FALSE)$residuals^2)}) | |
| 352 rm(X_t,j) | |
| 353 RSSf<-unlist(RSS) | |
| 354 RSS_H0<-sum(Res_H0^2) | |
| 355 df2<-n-df1-ncol(cof) | |
| 356 Ftest<-(rep(RSS_H0,length(RSSf))/RSSf-1)*df2/df1 | |
| 357 pval<-pf(Ftest,df1,df2,lower.tail=FALSE) | |
| 358 list(out=rbind(data.frame(SNP=colnames(cof)[-1],'pval'=GLS_lm$coef[2:(ncol(cof)),4]), | |
| 359 data.frame('SNP'=colnames(X)[-which(colnames(X) %in% colnames(cof))],'pval'=pval)),cof=colnames(cof)[-1])} else {cat('error \n')}} | |
| 360 opt_extBIC_out<-bestmodel_pvals(opt_extBIC) | |
| 361 opt_mbonf_out<-bestmodel_pvals(opt_mbonf) | |
| 362 | |
| 363 list(step_table=fwdbwd_table,pval_step=pval_step,RSSout=plot_RSS,bonf_thresh=-log10(0.05/m),opt_extBIC=opt_extBIC_out,opt_mbonf=opt_mbonf_out)} | |
| 364 | |
| 365 plot_step_table<-function(x,type){ | |
| 366 if (type=='h2') {plot(x$step_table$step,x$step_table$h2,type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='h2') | |
| 367 abline(v=(nrow(x$step_table)/2-0.5),lty=2)} | |
| 368 else if (type=='maxpval'){plot(x$step_table$step,-log10(x$step_table$maxpval),type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='-log10(max_Pval)') | |
| 369 abline(h=x$bonf_thresh,lty=2) | |
| 370 abline(v=(nrow(x$step_table)/2-0.5),lty=2)} | |
| 371 else if (type=='BIC'){plot(x$step_table$step,x$step_table$BIC,type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='BIC') | |
| 372 abline(v=(nrow(x$step_table)/2-0.5),lty=2)} | |
| 373 else if (type=='extBIC'){plot(x$step_table$step,x$step_table$extBIC,type='b',lty=2,pch=20,col='darkblue',xlab='step',ylab='EBIC') | |
| 374 abline(v=(nrow(x$step_table)/2-0.5),lty=2)} | |
| 375 else {cat('error! \n argument type must be one of h2, maxpval, BIC, extBIC')}} | |
| 376 | |
| 377 plot_step_RSS<-function(x){ | |
| 378 op<-par(mar=c(5, 5, 2, 2)) | |
| 379 plot(0,0,xlim=c(0,nrow(x$RSSout)-1),ylim=c(0,1),xlab='step',ylab='%var',col=0) | |
| 380 polygon(c(0:(nrow(x$RSSout)-1),(nrow(x$RSSout)-1),0), c(x$RSSout[,3],0,0), col='brown1', border=0) | |
| 381 polygon(c(0:(nrow(x$RSSout)-1),(nrow(x$RSSout)-1),0), c(x$RSSout[,2],0,0), col='forestgreen', border=0) | |
| 382 polygon(c(0:(nrow(x$RSSout)-1),(nrow(x$RSSout)-1),0), c(x$RSSout[,1],0,0), col='dodgerblue4', border=0) | |
| 383 abline(v=(nrow(x$step_table)/2-0.5),lty=2) | |
| 384 par(op)} | |
| 385 | |
| 386 plot_GWAS<-function(x) { | |
| 387 output_<-x$out[order(x$out$Pos),] | |
| 388 output_ok<-output_[order(output_$Chr),] | |
| 389 | |
| 390 maxpos<-c(0,cumsum(as.numeric(aggregate(output_ok$Pos,list(output_ok$Chr),max)$x+max(cumsum(as.numeric(aggregate(output_ok$Pos,list(output_ok$Chr),max)$x)))/200))) | |
| 391 plot_col<-rep(c('gray10','gray60'),ceiling(max(unique(output_ok$Chr))/2)) | |
| 392 # plot_col<-c('blue','darkgreen','red','cyan','purple') | |
| 393 size<-aggregate(output_ok$Pos,list(output_ok$Chr),length)$x | |
| 394 a<-rep(maxpos[1],size[1]) | |
| 395 b<-rep(plot_col[1],size[1]) | |
| 396 for (i in 2:max(unique(output_ok$Chr))){ | |
| 397 a<-c(a,rep(maxpos[i],size[i])) | |
| 398 b<-c(b,rep(plot_col[i],size[i]))} | |
| 399 | |
| 400 output_ok$xpos<-output_ok$Pos+a | |
| 401 output_ok$col<-b | |
| 402 output_ok$col[output_ok$SNP %in% x$cof]<-'red' | |
| 403 | |
| 404 d<-(aggregate(output_ok$xpos,list(output_ok$Chr),min)$x+aggregate(output_ok$xpos,list(output_ok$Chr),max)$x)/2 | |
| 405 | |
| 406 plot(output_ok$xpos,-log10(output_ok$pval),col=output_ok$col,pch=20,ylab='-log10(pval)',xaxt='n',xlab='chromosome') | |
| 407 axis(1,tick=FALSE,at=d,labels=c(1:max(unique(output_ok$Chr)))) | |
| 408 abline(h=x$bonf_thresh,lty=3,col='black')} | |
| 409 | |
| 410 plot_region<-function(x,chrom,pos1,pos2){ | |
| 411 region<-subset(x$out,Chr==chrom & Pos>=pos1 & Pos <=pos2) | |
| 412 region$col<- if (chrom %% 2 == 0) {'gray60'} else {'gray10'} | |
| 413 region$col[which(region$SNP %in% x$cof)]<-'red' | |
| 414 plot(region$Pos,-log10(region$pval),type='p',pch=20,main=paste('chromosome',chrom,sep=''),xlab='position (bp)',ylab='-log10(pval)',col=region$col,xlim=c(pos1,pos2)) | |
| 415 abline(h=x$bonf_thresh,lty=3,col='black')} | |
| 416 | |
| 417 | |
| 418 plot_fwd_GWAS<-function(x,step,snp_info,pval_filt) { | |
| 419 stopifnot(step<=length(x$pval_step)) | |
| 420 output<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by='SNP'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh) | |
| 421 plot_GWAS(output)} | |
| 422 | |
| 423 plot_fwd_region<-function(x,step,snp_info,pval_filt,chrom,pos1,pos2) { | |
| 424 stopifnot(step<=length(x$pval_step)) | |
| 425 output<-list(out=subset(merge(snp_info,x$pval_step[[step]]$out,by='SNP'),pval<=pval_filt),cof=x$pval_step[[step]]$cof,bonf_thresh=x$bonf_thresh) | |
| 426 plot_region(output,chrom,pos1,pos2)} | |
| 427 | |
| 428 | |
| 429 plot_opt_GWAS<-function(x,opt,snp_info,pval_filt) { | |
| 430 if (opt=='extBIC') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by='SNP'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh) | |
| 431 plot_GWAS(output)} | |
| 432 else if (opt=='mbonf') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by='SNP'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh) | |
| 433 plot_GWAS(output)} | |
| 434 else {cat('error! \n opt must be extBIC or mbonf')}} | |
| 435 | |
| 436 plot_opt_region<-function(x,opt,snp_info,pval_filt,chrom,pos1,pos2) { | |
| 437 if (opt=='extBIC') {output<-list(out=subset(merge(snp_info,x$opt_extBIC$out,by='SNP'),pval<=pval_filt),cof=x$opt_extBIC$cof,bonf_thresh=x$bonf_thresh) | |
| 438 plot_region(output,chrom,pos1,pos2)} | |
| 439 else if (opt=='mbonf') {output<-list(out=subset(merge(snp_info,x$opt_mbonf$out,by='SNP'),pval<=pval_filt),cof=x$opt_mbonf$cof,bonf_thresh=x$bonf_thresh) | |
| 440 plot_region(output,chrom,pos1,pos2)} | |
| 441 else {cat('error! \n opt must be extBIC or mbonf')}} | |
| 442 | |
| 443 | |
| 444 qqplot_fwd_GWAS<-function(x,nsteps){ | |
| 445 stopifnot(nsteps<=length(x$pval_step)) | |
| 446 e<--log10(ppoints(nrow(x$pval_step[[1]]$out))) | |
| 447 ostep<-list() | |
| 448 ostep[[1]]<--log10(sort(x$pval_step[[1]]$out$pval)) | |
| 449 for (i in 2:nsteps) {ostep[[i]]<--log10(sort(x$pval_step[[i]]$out$pval))} | |
| 450 | |
| 451 maxp<-ceiling(max(unlist(ostep))) | |
| 452 | |
| 453 plot(e,ostep[[1]],type='b',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp)) | |
| 454 abline(0,1,col="dark grey") | |
| 455 | |
| 456 for (i in 2:nsteps) { | |
| 457 par(new=T) | |
| 458 plot(e,ostep[[i]],type='b',pch=20,cex=0.8,col=i,axes='F',xlab='',ylab='',xlim=c(0,max(e)+1),ylim=c(0,maxp))} | |
| 459 legend(0,maxp,lty=1,pch=20,col=c(1:length(ostep)),paste(c(0:(length(ostep)-1)),'cof',sep=' ')) | |
| 460 } | |
| 461 | |
| 462 qqplot_opt_GWAS<-function(x,opt){ | |
| 463 if (opt=='extBIC') { | |
| 464 e<--log10(ppoints(nrow(x$opt_extBIC$out))) | |
| 465 o<--log10(sort(x$opt_extBIC$out$pval)) | |
| 466 maxp<-ceiling(max(o)) | |
| 467 plot(e,o,type='b',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste('optimal model according to extBIC')) | |
| 468 abline(0,1,col="dark grey")} | |
| 469 else if (opt=='mbonf') { | |
| 470 e<--log10(ppoints(nrow(x$opt_mbonf$out))) | |
| 471 o<--log10(sort(x$opt_mbonf$out$pval)) | |
| 472 maxp<-ceiling(max(o)) | |
| 473 plot(e,o,type='b',pch=20,cex=0.8,col=1,xlab=expression(Expected~~-log[10](italic(p))), ylab=expression(Observed~~-log[10](italic(p))),xlim=c(0,max(e)+1),ylim=c(0,maxp),main=paste('optimal model according to mbonf')) | |
| 474 abline(0,1,col="dark grey")} | |
| 475 else {cat('error! \n opt must be extBIC or mbonf')}} | |
| 476 | |
| 477 | 
