0
|
1 # Matrix eQTL by Andrey A. Shabalin
|
|
2 # http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
|
|
3
|
|
4 # http://cran.r-project.org/web/packages/policies.html
|
|
5
|
|
6 library(methods)
|
|
7
|
|
8 modelLINEAR = 117348L;
|
|
9 modelANOVA = 47074L;
|
|
10 modelLINEAR_CROSS = 1113461L;
|
|
11
|
|
12 .seq = function(a,b){if(a<=b){a:b}else{integer(0)}};
|
|
13
|
|
14 SlicedData <- setRefClass( "SlicedData",
|
|
15 fields = list(
|
|
16 dataEnv = "environment",
|
|
17 nSlices1 = "numeric",
|
|
18 rowNameSlices = "list",
|
|
19 columnNames = "character",
|
|
20 fileDelimiter = "character",
|
|
21 fileSkipColumns = "numeric",
|
|
22 fileSkipRows = "numeric",
|
|
23 fileSliceSize = "numeric",
|
|
24 fileOmitCharacters = "character"
|
|
25 ),
|
|
26 methods = list(
|
|
27 initialize = function( mat = NULL ) {
|
|
28 dataEnv <<- new.env(hash = TRUE, size = 29L);
|
|
29 nSlices1 <<- 0L;
|
|
30 if(!is.null(mat)) {
|
|
31 CreateFromMatrix(mat);
|
|
32 }
|
|
33 fileSliceSize <<- 1000;
|
|
34 fileDelimiter <<- "\t";
|
|
35 fileSkipColumns <<- 1L;
|
|
36 fileSkipRows <<- 1L;
|
|
37 fileOmitCharacters <<- "NA"
|
|
38 return(invisible(.self));
|
|
39 },
|
|
40 CreateFromMatrix = function( mat ) {
|
|
41 stopifnot( class(mat) == "matrix" );
|
|
42 setSliceRaw( 1L ,mat );
|
|
43 rns = rownames( mat, do.NULL = FALSE);
|
|
44 #if( is.null(rns) ) {
|
|
45 # rns = paste( "Row_",(1:nrow(mat)), sep="" );
|
|
46 #}
|
|
47 rowNameSlices <<- list(rns);
|
|
48 cns = colnames( mat, do.NULL = FALSE );
|
|
49 #if( is.null(cns) ){
|
|
50 # cns = paste( "Col_",(1:ncol(mat)), sep="" );
|
|
51 #}
|
|
52 columnNames <<- cns;
|
|
53 return(invisible(.self));
|
|
54 },
|
|
55 getSlice = function(sl) {
|
|
56 value = get(paste(sl), dataEnv);
|
|
57 if( is.raw(value) ) {
|
|
58 storage.mode(value) = "double";
|
|
59 value[value == 255] = NA;
|
|
60 }
|
|
61 return( value )
|
|
62 },
|
|
63 getSliceRaw = function(sl) {
|
|
64 return( get(paste(sl), dataEnv) )
|
|
65 },
|
|
66 setSliceRaw = function(sl, value) {
|
|
67 assign( paste(sl), value, dataEnv )
|
|
68 if( nSlices1 < sl ) {
|
|
69 nSlices1 <<- sl;
|
|
70 }
|
|
71 },
|
|
72 setSlice = function(sl, value) {
|
|
73 if( length(value) > 0 ) {
|
|
74 if( all(as.integer(value) == value, na.rm = TRUE) ) {
|
|
75 if( (min(value, na.rm = TRUE) >= 0 ) &&
|
|
76 (max(value, na.rm = TRUE) < 255) )
|
|
77 {
|
|
78 nv = value;
|
|
79 suppressWarnings({storage.mode(nv) = "raw"});
|
|
80 nv[ is.na(value)] = as.raw(255);
|
|
81 value = nv;
|
|
82 } else {
|
|
83 storage.mode(value) = "integer";
|
|
84 }
|
|
85 }
|
|
86 }
|
|
87 setSliceRaw(sl, value);
|
|
88 },
|
|
89 nSlices = function() {
|
|
90 return( nSlices1 );
|
|
91 },
|
|
92 LoadFile = function(filename, skipRows = NULL, skipColumns = NULL, sliceSize = NULL, omitCharacters = NULL, delimiter = NULL, rowNamesColumn = 1) {
|
|
93 if( !is.null(skipRows) ) {
|
|
94 fileSkipRows <<- skipRows;
|
|
95 }
|
|
96 if( !is.null(skipColumns) ) {
|
|
97 fileSkipColumns <<- skipColumns;
|
|
98 }
|
|
99 if( !is.null(omitCharacters) ) {
|
|
100 fileOmitCharacters <<- omitCharacters;
|
|
101 }
|
|
102 if( !is.null(sliceSize) ) {
|
|
103 fileSliceSize <<- sliceSize;
|
|
104 }
|
|
105 if( !is.null(delimiter) ) {
|
|
106 fileDelimiter <<- delimiter;
|
|
107 }
|
|
108 stopifnot( (fileSkipColumns == 0) || (rowNamesColumn <= fileSkipColumns) )
|
|
109 stopifnot( (fileSkipColumns == 0) || (rowNamesColumn >= 1) )
|
|
110
|
|
111 fid = file(description = filename, open = "rt", blocking = FALSE, raw = FALSE)
|
|
112 # clean object if file is open
|
|
113 Clear();
|
|
114 lines = readLines(con = fid, n = max(fileSkipRows,1L), ok = TRUE, warn = TRUE)
|
|
115 line1 = tail(lines,1);
|
|
116 splt = strsplit(line1, split = fileDelimiter, fixed = TRUE);
|
|
117 if( fileSkipRows > 0L ) {
|
|
118 columnNames <<- splt[[1]]; # [ -(1:fileSkipColumns) ];
|
|
119 } else {
|
|
120 seek(fid, 0)
|
|
121 }
|
|
122
|
|
123 rm( lines, line1, splt );
|
|
124
|
|
125 rowNameSlices <<- vector("list", 15);
|
|
126
|
|
127 curSliceId = 0L;
|
|
128 repeat
|
|
129 {
|
|
130 # preallocate names and data
|
|
131 if(length(rowNameSlices) < curSliceId) {
|
|
132 rowNameSlices[[2L*curSliceId]] <<- NULL;
|
|
133 }
|
|
134 curSliceId = curSliceId + 1L;
|
|
135
|
|
136 # read sliceSize rows
|
|
137 rowtag = vector("character",fileSliceSize);
|
|
138 rowvals = vector("list",fileSliceSize);
|
|
139 for(i in 1:fileSliceSize) {
|
|
140 temp = "";
|
|
141 if( fileSkipColumns > 0L ) {
|
|
142 temp = scan(file = fid, what = character(), n = fileSkipColumns, quiet = TRUE,sep = fileDelimiter);
|
|
143 }
|
|
144 rowtag[i] = temp[rowNamesColumn];#paste(temp,collapse=" ");
|
|
145 rowvals[[i]] = scan(file = fid, what = double(), nlines = 1, quiet = TRUE, sep = fileDelimiter, na.strings = fileOmitCharacters);
|
|
146 if( length(rowvals[[i]]) == 0L ) {
|
|
147 if(i==1L) {
|
|
148 rowtag = matrix(0, 0, 0);
|
|
149 rowvals = character(0);
|
|
150 } else {
|
|
151 rowtag = rowtag[ 1:(i-1) ];
|
|
152 rowvals = rowvals[ 1:(i-1) ];
|
|
153 }
|
|
154 break;
|
|
155 }
|
|
156 }
|
|
157 if( length(rowtag) == 0L ) {
|
|
158 curSliceId = curSliceId - 1L;
|
|
159 break;
|
|
160 }
|
|
161 rowNameSlices[[curSliceId]] <<- rowtag;
|
|
162 data = c(rowvals, recursive = TRUE);
|
|
163 dim(data) = c(length(rowvals[[1]]), length(rowvals));
|
|
164 data = t(data);
|
|
165 setSlice(curSliceId, data);
|
|
166 if( length(rowtag) < fileSliceSize ) {
|
|
167 break;
|
|
168 }
|
|
169 numtxt = formatC(curSliceId*fileSliceSize, big.mark=",", format = "f", digits = 0)
|
|
170 cat( "Rows read: ", numtxt, "\n");
|
|
171 flush.console()
|
|
172 }
|
|
173 close(fid)
|
|
174 if( fileSkipRows == 0 ) {
|
|
175 columnNames <<- paste("Col_", (1:nCols()), sep="");
|
|
176 } else {
|
|
177 columnNames <<- tail(columnNames, ncol(getSliceRaw(1)));
|
|
178 }
|
|
179 if( fileSkipColumns == 0 ) {
|
|
180 cnt = 0L;
|
|
181 for( sl in 1:nSlices() ) {
|
|
182 nr = length(getSliceRaw(sl));
|
|
183 rowNameSlices[[sl]] <<- paste("Row_",cnt + (1:nr),sep="");
|
|
184 cnt = cnt + nr;
|
|
185 }
|
|
186 }
|
|
187 rowNameSlices <<- rowNameSlices[1:curSliceId];
|
|
188 cat("Rows read: ", nRows(), " done.\n");
|
|
189 return(invisible(.self));
|
|
190 },
|
|
191 SaveFile = function(filename) {
|
|
192 if( nSlices() == 0 ) {
|
|
193 cat("No data to save");
|
|
194 return();
|
|
195 }
|
|
196 fid = file(filename,"wt");
|
|
197 for( sl in 1:nSlices() ) {
|
|
198 z = getSlice(sl);
|
|
199 rownames(z) = rowNameSlices[[sl]];
|
|
200 colnames(z) = columnNames;
|
|
201 write.table(z, file = fid, sep = "\t",
|
|
202 col.names = (if(sl == 1){NA}else{FALSE}));
|
|
203 }
|
|
204 close(fid);
|
|
205 },
|
|
206 nRows = function() {
|
|
207 s = 0L;
|
|
208 for(sl in .seq(1,nSlices())) {
|
|
209 s = s + nrow(getSliceRaw(sl));
|
|
210 }
|
|
211 return( s )
|
|
212 },
|
|
213 nCols = function() {
|
|
214 if( nSlices() == 0L ) {
|
|
215 return(0L);
|
|
216 } else {
|
|
217 return( ncol(getSliceRaw(1L)) )
|
|
218 }
|
|
219 },
|
|
220 Clear = function() {
|
|
221 for( sl in .seq(1,nSlices()) ) {
|
|
222 rm(list = paste(sl), envir = dataEnv)
|
|
223 }
|
|
224 nSlices1 <<- 0L;
|
|
225 rowNameSlices <<- list();
|
|
226 columnNames <<- character();
|
|
227 return(invisible(.self));
|
|
228 },
|
|
229 IsCombined = function() {
|
|
230 return( nSlices() <= 1L );
|
|
231 },
|
|
232 GetAllRowNames = function() {
|
|
233 return( c(rowNameSlices, recursive=TRUE) );
|
|
234 },
|
|
235 SetNanRowMean = function() {
|
|
236 if( (nCols() == 0L) ) {
|
|
237 return(invisible(.self));
|
|
238 }
|
|
239 for( sl in .seq(1,nSlices()) ) {
|
|
240 slice = getSlice(sl);
|
|
241 if( any(is.na(slice)) ) {
|
|
242 rowmean = rowMeans(slice, na.rm = TRUE);
|
|
243 rowmean[is.na(rowmean)] = 0L;
|
|
244 for( j in which(!complete.cases(slice)) ) {
|
|
245 where1 = is.na(slice[j, ]);
|
|
246 slice[j, where1] = rowmean[j];
|
|
247 }
|
|
248 setSlice(sl, slice);
|
|
249 }
|
|
250 }
|
|
251 return(invisible(.self));
|
|
252 },
|
|
253 RowStandardizeCentered = function() {
|
|
254 for(sl in .seq(1,nSlices()) ) {
|
|
255 slice = getSlice(sl);
|
|
256 div = sqrt( rowSums(slice^2) );
|
|
257 div[ div == 0 ] = 1;
|
|
258 setSlice(sl, slice/div);
|
|
259 }
|
|
260 return(invisible(.self));
|
|
261 },
|
|
262 CombineInOneSlice = function() {
|
|
263 if( nSlices() <= 1L ) {
|
|
264 return(invisible(.self));
|
|
265 }
|
|
266 nc = nCols();
|
|
267 nr = nRows();
|
|
268 datatypes = c("raw","integer","double");
|
|
269 datafuns = c(as.raw, as.integer, as.double);
|
|
270 datatype = character(nSlices());
|
|
271 for(sl in 1:nSlices()) {
|
|
272 datatype[sl] = typeof(getSliceRaw(sl));
|
|
273 }
|
|
274 mch = max(match(datatype,datatypes,nomatch = length(datatypes)));
|
|
275 datafun = datafuns[[mch]];
|
|
276 newData = matrix(datafun(0), nrow = nr, ncol = nc);
|
|
277 offset = 0;
|
|
278 for(sl in 1:nSlices()) {
|
|
279 if(mch==1) {
|
|
280 slice = getSliceRaw(sl);
|
|
281 } else {
|
|
282 slice = getSlice(sl);
|
|
283 }
|
|
284 newData[ offset + (1:nrow(slice)),] = datafun(slice);
|
|
285 setSlice(sl, numeric());
|
|
286 offset = offset + nrow(slice);
|
|
287 }
|
|
288
|
|
289 nSlices1 <<- 1L;
|
|
290 setSliceRaw(1L, newData);
|
|
291 rm(newData);
|
|
292
|
|
293 newrowNameSlices = GetAllRowNames();
|
|
294 rowNameSlices <<- list(newrowNameSlices)
|
|
295 return(invisible(.self));
|
|
296 },
|
|
297 ResliceCombined = function(sliceSize = -1) {
|
|
298 if( sliceSize > 0L ) {
|
|
299 fileSliceSize <<- sliceSize;
|
|
300 }
|
|
301 if( fileSliceSize <= 0 ) {
|
|
302 fileSliceSize <<- 1000;
|
|
303 }
|
|
304 if( IsCombined() ) {
|
|
305 nRows1 = nRows();
|
|
306 if(nRows1 == 0L) {
|
|
307 return(invisible(.self));
|
|
308 }
|
|
309 newNSlices = floor( (nRows1 + fileSliceSize - 1)/fileSliceSize );
|
|
310 oldData = getSliceRaw(1L);
|
|
311 #oldNames = rowNameSlices[[1]];
|
|
312 newNameslices = vector("list",newNSlices)
|
|
313 for( sl in 1:newNSlices ) {
|
|
314 range = (1+(sl-1)*fileSliceSize) : (min(nRows1,sl*fileSliceSize));
|
|
315 newpart = oldData[range, ,drop = FALSE];
|
|
316 if( is.raw(oldData) ) {
|
|
317 setSliceRaw( sl, newpart);
|
|
318 } else {
|
|
319 setSlice( sl, newpart);
|
|
320 }
|
|
321 newNameslices[[sl]] = rowNameSlices[[1]][range];
|
|
322 }
|
|
323 rowNameSlices <<- newNameslices ;
|
|
324 } else {
|
|
325 stop("Reslice of a sliced matrix is not supported yet. Use CombineInOneSlice first.");
|
|
326 }
|
|
327 return(invisible(.self));
|
|
328 },
|
|
329 Clone = function() {
|
|
330 clone = SlicedData$new();
|
|
331 for(sl in .seq(1,nSlices()) ) {
|
|
332 clone$setSliceRaw(sl,getSliceRaw(sl));
|
|
333 }
|
|
334 clone$rowNameSlices = rowNameSlices;
|
|
335 clone$columnNames = columnNames;
|
|
336 clone$fileDelimiter = fileDelimiter;
|
|
337 clone$fileSkipColumns = fileSkipColumns;
|
|
338 clone$fileSkipRows = fileSkipRows;
|
|
339 clone$fileSliceSize = fileSliceSize;
|
|
340 clone$fileOmitCharacters = fileOmitCharacters;
|
|
341 return( clone );
|
|
342 },
|
|
343 RowMatrixMultiply = function(multiplier) {
|
|
344 for(sl in .seq(1,nSlices()) ) {
|
|
345 setSlice(sl, getSlice(sl) %*% multiplier);
|
|
346 }
|
|
347 return(invisible(.self));
|
|
348 },
|
|
349 ColumnSubsample = function(subset) {
|
|
350 for(sl in .seq(1,nSlices()) ) {
|
|
351 setSliceRaw(sl, getSliceRaw(sl)[ ,subset, drop = FALSE]);
|
|
352 }
|
|
353 columnNames <<- columnNames[subset];
|
|
354 return(invisible(.self));
|
|
355 },
|
|
356 RowReorderSimple = function(ordr) {
|
|
357 # had to use an inefficient and dirty method
|
|
358 # due to horrible memory management in R
|
|
359 if( (typeof(ordr) == "logical") && all(ordr) ) {
|
|
360 return(invisible(.self));
|
|
361 }
|
|
362 if( (length(ordr) == nRows()) && all(ordr == (1:length(ordr))) ) {
|
|
363 return(invisible(.self));
|
|
364 }
|
|
365 CombineInOneSlice();
|
|
366 gc();
|
|
367 setSliceRaw( 1L, getSliceRaw(1L)[ordr, ] );
|
|
368 rowNameSlices[[1]] <<- rowNameSlices[[1]][ordr];
|
|
369 gc();
|
|
370 ResliceCombined();
|
|
371 gc();
|
|
372 return(invisible(.self));
|
|
373 },
|
|
374 RowReorder = function(ordr) {
|
|
375 # transform logical into indices
|
|
376 if( typeof(ordr) == "logical" ) {
|
|
377 if( length(ordr) == nRows() ) {
|
|
378 ordr = which(ordr);
|
|
379 } else {
|
|
380 stop("Parameter \"ordr\" has wrong length")
|
|
381 }
|
|
382 }
|
|
383 ## first, check that anything has to be done at all
|
|
384 if( (length(ordr) == nRows()) && all(ordr == (1:length(ordr))) ) {
|
|
385 return(invisible(.self));
|
|
386 }
|
|
387 ## check bounds
|
|
388 #if( (min(ordr) < 1) || (max(ordr) > nRows()) ) {
|
|
389 # stop("Parameter \"ordr\" is out of bounds");
|
|
390 #}
|
|
391 ## slice the data into individual rows
|
|
392 all_rows = vector("list", nSlices())
|
|
393 for( i in 1:nSlices() ) {
|
|
394 slice = getSliceRaw(i)
|
|
395 all_rows[[i]] = split(slice, 1:nrow(slice))
|
|
396 setSliceRaw(i,numeric())
|
|
397 }
|
|
398 gc();
|
|
399 all_rows = unlist(all_rows, recursive=FALSE, use.names = FALSE);
|
|
400 ## Reorder the rows
|
|
401 all_rows = all_rows[ordr];
|
|
402 ## get row names
|
|
403 all_names = GetAllRowNames();
|
|
404 ## erase the set
|
|
405 rowNameSlices <<- list();
|
|
406 ## sort names
|
|
407 all_names = all_names[ordr];
|
|
408 ##
|
|
409 ## Make slices back
|
|
410 nrows = length(all_rows);
|
|
411 nSlices1 <<- as.integer((nrows+fileSliceSize-1)/fileSliceSize);
|
|
412 ##cat(nrows, " ", nSlices1);
|
|
413 rowNameSlices1 = vector("list", nSlices1);
|
|
414 for( i in 1:nSlices1 ) {
|
|
415 fr = 1 + fileSliceSize*(i-1);
|
|
416 to = min( fileSliceSize*i, nrows);
|
|
417
|
|
418 subset = all_rows[fr:to];
|
|
419 types = unlist(lapply(subset,typeof));
|
|
420 israw = (types == "raw")
|
|
421 if(!all(israw == israw[1])) {
|
|
422 # some raw and some are not
|
|
423 subset = lapply(subset, function(x){if(is.raw(x)){x=as.integer(x);x[x==255] = NA;return(x)}else{return(x)}});
|
|
424 }
|
|
425 subset = unlist(subset);
|
|
426 dim(subset) = c( length(all_rows[[fr]]) , to - fr + 1)
|
|
427 #subset = matrix(subset, ncol = (to-fr+1));
|
|
428 if(is.raw(subset)) {
|
|
429 setSliceRaw(i, t(subset));
|
|
430 } else {
|
|
431 setSlice(i, t(subset));
|
|
432 }
|
|
433 rowNameSlices1[[i]] = all_names[fr:to];
|
|
434 all_rows[fr:to] = 0;
|
|
435 all_names[fr:to] = 0;
|
|
436 }
|
|
437 rowNameSlices <<- rowNameSlices1;
|
|
438 gc();
|
|
439 return(invisible(.self));
|
|
440 },
|
|
441 RowRemoveZeroEps = function(){
|
|
442 for(sl in .seq(1,nSlices()) ) {
|
|
443 slice = getSlice(sl);
|
|
444 amean = rowMeans(abs(slice));
|
|
445 remove = (amean < .Machine$double.eps*nCols());
|
|
446 if(any(remove)) {
|
|
447 rowNameSlices[[sl]] <<- rowNameSlices[[sl]][!remove];
|
|
448 setSlice(sl, slice[!remove, , drop = FALSE]);
|
|
449 }
|
|
450 }
|
|
451 return(invisible(.self));
|
|
452 },
|
|
453 FindRow = function(rowname) {
|
|
454 for(sl in .seq(1,nSlices()) ) {
|
|
455 mch = match(rowname,rowNameSlices[[sl]], nomatch = 0);
|
|
456 if( mch > 0 )
|
|
457 {
|
|
458 row = getSlice(sl)[mch[1], , drop=FALSE];
|
|
459 rownames(row) = rowname;
|
|
460 colnames(row) = columnNames;
|
|
461 return( list(slice = sl, item = mch, row = row) );
|
|
462 }
|
|
463 }
|
|
464 return( NULL );
|
|
465 },
|
|
466 show = function() {
|
|
467 cat("SlicedData object. For more information type: ?SlicedData\n");
|
|
468 cat("Number of columns:", nCols(), "\n");
|
|
469 cat("Number of rows:", nRows(), "\n");
|
|
470 cat("Data is stored in", nSlices(), "slices\n");
|
|
471 if(nCols()>0) {
|
|
472 z = getSlice(1L);
|
|
473 if(nrow(z)>0) {
|
|
474 z = z[1:min(nrow(z),10L), 1:min(ncol(z),10L), drop = FALSE];
|
|
475 rownames(z) = rowNameSlices[[1]][1:nrow(z)];
|
|
476 colnames(z) = columnNames[1:ncol(z)];
|
|
477 cat("Top left corner of the first slice (up to 10x10):\n");
|
|
478 methods:::show(z)
|
|
479 }
|
|
480 }
|
|
481 }
|
|
482 ))
|
|
483
|
|
484 setGeneric("nrow")
|
|
485 setMethod("nrow", "SlicedData", function(x) {
|
|
486 return( x$nRows() );
|
|
487 })
|
|
488 setGeneric("NROW")
|
|
489 setMethod("NROW", "SlicedData", function(x) {
|
|
490 return( x$nRows() );
|
|
491 })
|
|
492 setGeneric("ncol")
|
|
493 setMethod("ncol", "SlicedData", function(x) {
|
|
494 return( x$nCols() );
|
|
495 })
|
|
496 setGeneric("NCOL")
|
|
497 setMethod("NCOL", "SlicedData", function(x) {
|
|
498 return( x$nCols() );
|
|
499 })
|
|
500 setGeneric("dim")
|
|
501 setMethod("dim", "SlicedData", function(x) {
|
|
502 return( c(x$nRows(),x$nCols()) );
|
|
503 })
|
|
504 setGeneric("colnames")
|
|
505 setMethod("colnames", "SlicedData", function(x) {
|
|
506 return( x$columnNames );
|
|
507 })
|
|
508 setGeneric("rownames")
|
|
509 setMethod("rownames", "SlicedData", function(x) {
|
|
510 return( x$GetAllRowNames() );
|
|
511 })
|
|
512 setMethod("[[", "SlicedData", function(x,i) {
|
|
513 return( x$getSlice(i) );
|
|
514 })
|
|
515 setGeneric("length")
|
|
516 setMethod("length", "SlicedData", function(x) {
|
|
517 return( x$nSlices() );
|
|
518 })
|
|
519 setMethod("[[<-", "SlicedData", function(x,i,value) {
|
|
520 x$setSlice(i, value);
|
|
521 return(x);
|
|
522 })
|
|
523 summary.SlicedData = function(object, ...) {
|
|
524 z = c(nCols = object$nCols(), nRows = object$nRows(), nSlices = object$nSlices());
|
|
525 return(z);
|
|
526 }
|
|
527
|
|
528 ##### setGeneric("summary") #####
|
|
529 #setMethod("summary", "SlicedData", function(object, ...) {
|
|
530 # z = c(nCols = object$nCols(), nRows = object$nRows(), nSlices = object$nSlices());
|
|
531 # return(z);
|
|
532 # })
|
|
533 #setGeneric("show", standardGeneric("show"))
|
|
534 # setMethod("show", "SlicedData", function(object) {
|
|
535 # cat("SlicedData object. For more information type: ?SlicedData\n");
|
|
536 # cat("Number of columns:", object$nCols(), "\n");
|
|
537 # cat("Number of rows:", object$nRows(), "\n");
|
|
538 # cat("Data is stored in", object$nSlices(), "slices\n");
|
|
539 # if(object$nSlices()>0) {
|
|
540 # z = object$getSlice(1);
|
|
541 # if(nrow(z)>0) {
|
|
542 # z = z[1:min(nrow(z),10), 1:min(ncol(z),10), drop = FALSE];
|
|
543 # rownames(z) = object$rowNameSlices[[1]][1:nrow(z)];
|
|
544 # colnames(z) = object$columnNames[1:ncol(z)];
|
|
545 # cat("Top left corner of the first slice (up to 10x10):\n");
|
|
546 # show(z)
|
|
547 # }
|
|
548 # }
|
|
549 # })
|
|
550
|
|
551 setGeneric("as.matrix")
|
|
552 setMethod("as.matrix", "SlicedData", function(x) {
|
|
553 if(x$nSlices() == 0) {
|
|
554 return( matrix(0,0,0) );
|
|
555 }
|
|
556 if(x$nSlices() > 1) {
|
|
557 copy = x$Clone();
|
|
558 copy$CombineInOneSlice();
|
|
559 } else {
|
|
560 copy = x;
|
|
561 }
|
|
562 mat = copy$getSlice(1L);
|
|
563 rownames(mat) = rownames(copy);
|
|
564 colnames(mat) = colnames(copy);
|
|
565 return( mat );
|
|
566 })
|
|
567 setGeneric("colnames<-")
|
|
568 setMethod("colnames<-", "SlicedData", function(x,value) {
|
|
569 stopifnot( class(value) == "character" );
|
|
570 stopifnot( length(value) == x$nCols() );
|
|
571 x$columnNames = value;
|
|
572 return(x);
|
|
573 })
|
|
574 setGeneric("rownames<-")
|
|
575 setMethod("rownames<-", "SlicedData", function(x,value) {
|
|
576 stopifnot( class(value) == "character" );
|
|
577 stopifnot( length(value) == x$nRows() );
|
|
578 start = 1;
|
|
579 newNameSlices = vector("list", x$nSlices());
|
|
580 for( i in .seq(1,x$nSlices()) ) {
|
|
581 nr = nrow(x$getSliceRaw(i));
|
|
582 newNameSlices[[i]] = value[ start:(start+nr-1) ];
|
|
583 start = start + nr;
|
|
584 }
|
|
585 x$rowNameSlices = newNameSlices;
|
|
586 return(x);
|
|
587 })
|
|
588 setGeneric("rowSums")
|
|
589 setMethod("rowSums", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
|
|
590 if(x$nSlices() == 0) {
|
|
591 return( numeric() );
|
|
592 }
|
|
593 stopifnot( dims == 1 );
|
|
594 thesum = vector("list", x$nSlices());
|
|
595 for( i in 1:x$nSlices() ) {
|
|
596 thesum[[i]] = rowSums(x$getSlice(i), na.rm)
|
|
597 }
|
|
598 return(unlist(thesum, recursive = FALSE, use.names = FALSE));
|
|
599 })
|
|
600 setGeneric("rowMeans")
|
|
601 setMethod("rowMeans", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
|
|
602 if(x$nSlices() == 0) {
|
|
603 return( numeric() );
|
|
604 }
|
|
605 stopifnot( dims == 1 );
|
|
606 thesum = vector("list", x$nSlices());
|
|
607 for( i in 1:x$nSlices() ) {
|
|
608 thesum[[i]] = rowMeans(x$getSlice(i), na.rm)
|
|
609 }
|
|
610 return(unlist(thesum, recursive = FALSE, use.names = FALSE));
|
|
611 })
|
|
612 setGeneric("colSums")
|
|
613 setMethod("colSums", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
|
|
614 if(x$nCols() == 0) {
|
|
615 return( numeric() );
|
|
616 }
|
|
617 stopifnot( dims == 1 );
|
|
618 thesum = 0;
|
|
619 for( i in .seq(1,x$nSlices()) ) {
|
|
620 thesum = thesum + colSums(x$getSlice(i), na.rm)
|
|
621 }
|
|
622 return(thesum);
|
|
623 })
|
|
624 setGeneric("colMeans")
|
|
625 setMethod("colMeans", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
|
|
626 if(x$nCols() == 0) {
|
|
627 return( numeric() );
|
|
628 }
|
|
629 stopifnot( dims == 1 );
|
|
630 thesum = 0;
|
|
631 thecounts = x$nRows();
|
|
632 for( i in .seq(1,x$nSlices()) ) {
|
|
633 slice = x$getSlice(i);
|
|
634 thesum = thesum + colSums(slice, na.rm)
|
|
635 if( na.rm ) {
|
|
636 thecounts = thecounts - colSums(is.na(slice))
|
|
637 }
|
|
638 }
|
|
639 return(thesum/thecounts);
|
|
640 })
|
|
641
|
|
642 .listBuilder <- setRefClass(".listBuilder",
|
|
643 fields = list(
|
|
644 dataEnv = "environment",
|
|
645 n = "numeric"
|
|
646 ),
|
|
647 methods = list(
|
|
648 initialize = function() {
|
|
649 dataEnv <<- new.env(hash = TRUE);
|
|
650 n <<- 0;
|
|
651 # cumlength <<- 0;
|
|
652 return(.self);
|
|
653 },
|
|
654 add = function(x) {
|
|
655 if(length(x) > 0) {
|
|
656 n <<- n + 1;
|
|
657 # cumlength <<- cumlength + length(x);
|
|
658 assign(paste(n), x, dataEnv );
|
|
659 }
|
|
660 return(.self);
|
|
661 },
|
|
662 set = function(i,x) {
|
|
663 if(length(x) > 0) {
|
|
664 if(i>n)
|
|
665 n <<- i;
|
|
666 assign(paste(i), x, dataEnv );
|
|
667 }
|
|
668 return(.self);
|
|
669 },
|
|
670 get = function(i) {
|
|
671 return(base::get(paste(i),dataEnv));
|
|
672 },
|
|
673 list = function() {
|
|
674 if(n==0) return(list());
|
|
675 result = vector('list',n);
|
|
676 for( i in 1:n) {
|
|
677 result[[i]] = .self$get(i);
|
|
678 }
|
|
679 return(result);
|
|
680 },
|
|
681 unlist = function() {
|
|
682 return(base::unlist(.self$list(), recursive=FALSE, use.names = FALSE));
|
|
683 },
|
|
684 show = function() {
|
|
685 cat(".listBuilder object.\nIternal object in MatrixEQTL package.\n");
|
|
686 cat("Number of elements:", object$n, "\n");
|
|
687 }
|
|
688 ))
|
|
689
|
|
690 .histogrammer <- setRefClass(".histogrammer",
|
|
691 fields = list(
|
|
692 pvbins1 = "numeric",
|
|
693 statbins1 = "numeric",
|
|
694 hist.count = "numeric"
|
|
695 ),
|
|
696 methods = list(
|
|
697 initialize = function (pvbins, statbins) {
|
|
698 if(length(pvbins)) {
|
|
699 ord = order(statbins);
|
|
700 pvbins1 <<- pvbins[ord];
|
|
701 statbins1 <<- statbins[ord];
|
|
702 hist.count <<- double(length(pvbins)-1);
|
|
703 }
|
|
704 return(.self);
|
|
705 },
|
|
706 update = function(stats.for.hist) {
|
|
707 h = hist(stats.for.hist, breaks = statbins1, include.lowest = TRUE, right = TRUE, plot = FALSE)$counts;
|
|
708 hist.count <<- hist.count + h;
|
|
709 },
|
|
710 getResults = function() {
|
|
711 if(!is.unsorted(pvbins1)) {
|
|
712 return(list(hist.bins = pvbins1 , hist.counts = hist.count ));
|
|
713 } else {
|
|
714 return(list(hist.bins = rev(pvbins1), hist.counts = rev(hist.count)));
|
|
715 }
|
|
716 }
|
|
717 ))
|
|
718
|
|
719
|
|
720 .minpvalue <- setRefClass(".minpvalue",
|
|
721 fields = list(
|
|
722 sdata = ".listBuilder",
|
|
723 gdata = ".listBuilder"
|
|
724 ),
|
|
725 methods = list(
|
|
726 initialize = function(snps, gene) {
|
|
727 sdata <<- .listBuilder$new();
|
|
728 for( ss in 1:snps$nSlices() ) {
|
|
729 sdata$set( ss, double(nrow(snps$getSliceRaw(ss))));
|
|
730 }
|
|
731 gdata <<- .listBuilder$new();
|
|
732 for( gg in 1:gene$nSlices() ) {
|
|
733 gdata$set( gg, double(nrow(gene$getSliceRaw(gg))));
|
|
734 }
|
|
735 return(.self);
|
|
736 },
|
|
737 update = function(ss, gg, astat) {
|
|
738 gmax = gdata$get(gg)
|
|
739 z1 = max.col(astat,ties.method='first');
|
|
740 z11 = astat[1:nrow(astat) + nrow(astat) * (z1 - 1)];
|
|
741 gmax = pmax(gmax, z11);
|
|
742 gdata$set(gg, gmax);
|
|
743
|
|
744 smax = sdata$get(ss)
|
|
745 z22 = apply(astat,2,max);
|
|
746 smax = pmax(smax, z22);
|
|
747 sdata$set(ss, smax);
|
|
748 return(.self);
|
|
749 },
|
|
750 updatecis = function(ss, gg, select.cis, astat) {
|
|
751 if(length(astat)>0)
|
|
752 {
|
|
753 byrows = aggregate(x=astat, by=list(row=select.cis[,1]), FUN=max);
|
|
754 bycols = aggregate(x=astat, by=list(col=select.cis[,2]), FUN=max);
|
|
755
|
|
756 gmax = gdata$get(gg);
|
|
757 gmax[byrows$row] = pmax(gmax[byrows$row], byrows$x)
|
|
758 gdata$set(gg, gmax);
|
|
759
|
|
760 smax = sdata$get(ss)
|
|
761 smax[bycols$col] = pmax(smax[bycols$col], bycols$x)
|
|
762 sdata$set(ss, smax);
|
|
763 }
|
|
764 return(.self);
|
|
765 },
|
|
766 getResults = function(snps, gene, pvfun) {
|
|
767 min.pv.snps = pvfun(sdata$unlist());
|
|
768 names(min.pv.snps) = rownames(snps);
|
|
769 min.pv.gene = pvfun(gdata$unlist());
|
|
770 names(min.pv.gene) = rownames(gene);
|
|
771 return(list(min.pv.snps = min.pv.snps, min.pv.gene = min.pv.gene));
|
|
772 }
|
|
773 ))
|
|
774
|
|
775 .OutputSaver_FRD <- setRefClass(".OutputSaver_FRD",
|
|
776 fields = list(
|
|
777 sdata = ".listBuilder",
|
|
778 gdata = ".listBuilder",
|
|
779 cdata = ".listBuilder",
|
|
780 bdata = ".listBuilder",
|
|
781 fid = "list",
|
|
782 testfun1 = "list",
|
|
783 pvfun1 = "list"
|
|
784 ),
|
|
785 methods = list(
|
|
786 initialize = function () {
|
|
787 sdata <<- .listBuilder$new();
|
|
788 gdata <<- .listBuilder$new();
|
|
789 cdata <<- .listBuilder$new();
|
|
790 bdata <<- .listBuilder$new();
|
|
791 fid <<- list(0);
|
|
792 testfun1 <<- list(0);
|
|
793 pvfun1 <<- list(0);
|
|
794 return(.self);
|
|
795 },
|
|
796 start = function(filename, statistic_name, unused1, unused2, testfun, pvfun) {
|
|
797 testfun1 <<- list(testfun);
|
|
798 pvfun1 <<- list(pvfun);
|
|
799 if(length(filename) > 0) {
|
|
800 if(class(filename) == "character") {
|
|
801 fid <<- list(file(description = filename, open = "wt", blocking = FALSE, raw = FALSE), TRUE);
|
|
802 } else {
|
|
803 fid <<- list(filename, FALSE)
|
|
804 }
|
|
805 writeLines( paste("SNP\tgene\t",statistic_name,"\tp-value\tFDR", sep = ""), fid[[1]]);
|
|
806 } else {
|
|
807 fid <<- list();
|
|
808 }
|
|
809 },
|
|
810 update = function(spos, gpos, sta, beta = NULL) {
|
|
811 if(length(sta)>0) {
|
|
812 sdata$add(spos);
|
|
813 gdata$add(gpos);
|
|
814 cdata$add(sta );
|
|
815 if(!is.null(beta ))
|
|
816 bdata$add(beta );
|
|
817 }
|
|
818 return(.self);
|
|
819 },
|
|
820 getResults = function( gene, snps, FDR_total_count) {
|
|
821 pvalues = NULL;
|
|
822 if(cdata$n > 0) {
|
|
823 tests = testfun1[[1]](cdata$unlist());
|
|
824 cdata <<- .listBuilder$new();
|
|
825
|
|
826 pvalues = pvfun1[[1]](tests);
|
|
827 ord = order(pvalues);
|
|
828
|
|
829 tests = tests[ord];
|
|
830 pvalues = pvalues[ord];
|
|
831
|
|
832 FDR = pvalues * FDR_total_count / (1:length(pvalues));
|
|
833 FDR[length(FDR)] = min(FDR[length(FDR)], 1);
|
|
834 FDR = rev(cummin(rev(FDR)));
|
|
835
|
|
836 snps_names = snps$GetAllRowNames()[sdata$unlist()[ord]];
|
|
837 sdata <<- .listBuilder$new();
|
|
838 gene_names = gene$GetAllRowNames()[gdata$unlist()[ord]];
|
|
839 gdata <<- .listBuilder$new();
|
|
840
|
|
841 beta = NULL;
|
|
842 if(bdata$n > 0)
|
|
843 beta = bdata$unlist()[ord];
|
|
844
|
|
845 if(length(fid)>0) {
|
|
846 step = 1000; ########### 100000
|
|
847 for( part in 1:ceiling(length(FDR)/step) ) {
|
|
848 fr = (part-1)*step + 1;
|
|
849 to = min(part*step, length(FDR));
|
|
850 dump = data.frame(snps_names[fr:to],
|
|
851 gene_names[fr:to],
|
|
852 if(is.null(beta)) tests[fr:to] else list(beta[fr:to],tests[fr:to]),
|
|
853 pvalues[fr:to],
|
|
854 FDR[fr:to],
|
|
855 row.names = NULL,
|
|
856 check.rows = FALSE,
|
|
857 check.names = FALSE,
|
|
858 stringsAsFactors = FALSE);
|
|
859 write.table(dump, file = fid[[1]], quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE);
|
|
860 }
|
|
861 }
|
|
862 } else {
|
|
863 cat("No significant associations were found.\n", file = if(length(fid)>0){fid[[1]]}else{""});
|
|
864 }
|
|
865 if(length(fid)>0) {
|
|
866 if(fid[[2]]) {
|
|
867 close(fid[[1]]);
|
|
868 }
|
|
869 fid <<- list();
|
|
870 }
|
|
871
|
|
872 if(!is.null(pvalues)) {
|
|
873 eqtls = list( snps = snps_names,
|
|
874 gene = gene_names,
|
|
875 statistic = tests,
|
|
876 pvalue = pvalues,
|
|
877 FDR = FDR);
|
|
878 if(!is.null(beta))
|
|
879 eqtls$beta = beta;
|
|
880 } else {
|
|
881 eqtls = list( snps = character(),
|
|
882 gene = character(),
|
|
883 beta = numeric(),
|
|
884 statistic = numeric(),
|
|
885 pvalue = numeric(),
|
|
886 FDR = numeric());
|
|
887 }
|
|
888 return(list(eqtls = data.frame(eqtls)));
|
|
889 }
|
|
890 )
|
|
891 )
|
|
892
|
|
893
|
|
894 .OutputSaver_direct <- setRefClass(".OutputSaver_direct",
|
|
895 fields = list(
|
|
896 gene_names = "character",
|
|
897 snps_names = "character",
|
|
898 fid = "list",
|
|
899 testfun1 = "list",
|
|
900 pvfun1 = "list"
|
|
901 ),
|
|
902 methods = list(
|
|
903 initialize = function() {
|
|
904 gene_names <<- character(0);
|
|
905 snps_names <<- character(0);
|
|
906 fid <<- list(0);
|
|
907 testfun1 <<- list(0);
|
|
908 pvfun1 <<- list(0);
|
|
909 return(.self);
|
|
910 },
|
|
911 start = function(filename, statistic_name, snps, gene, testfun, pvfun) {
|
|
912 # I hope the program stops if it fails to open the file
|
|
913 if(class(filename) == "character") {
|
|
914 fid <<- list(file(description = filename, open = "wt", blocking = FALSE, raw = FALSE), TRUE);
|
|
915 } else {
|
|
916 fid <<- list(filename, FALSE)
|
|
917 }
|
|
918 writeLines(paste("SNP\tgene\t", statistic_name, "\tp-value", sep = ""), fid[[1]]);
|
|
919 gene_names <<- gene$GetAllRowNames();
|
|
920 snps_names <<- snps$GetAllRowNames();
|
|
921 testfun1 <<- list(testfun);
|
|
922 pvfun1 <<- list(pvfun);
|
|
923 },
|
|
924 update = function(spos, gpos, sta, beta = NULL) {
|
|
925 if( length(sta) == 0 )
|
|
926 return();
|
|
927 sta = testfun1[[1]](sta);
|
|
928 lst = list(snps = snps_names[spos], gene = gene_names[gpos], beta = beta, statistic = sta, pvalue = pvfun1[[1]](sta));
|
|
929 lst$beta = lst$beta;
|
|
930
|
|
931 dump2 = data.frame(lst, row.names = NULL, check.rows = FALSE, check.names = FALSE, stringsAsFactors = FALSE);
|
|
932 write.table(dump2, file = fid[[1]], quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE);
|
|
933 },
|
|
934 getResults = function(...) {
|
|
935 if(length(fid)>0) {
|
|
936 if(fid[[2]]) {
|
|
937 close(fid[[1]]);
|
|
938 }
|
|
939 fid <<- list();
|
|
940 }
|
|
941 gene_names <<- character(0);
|
|
942 snps_names <<- character(0);
|
|
943 return(list());
|
|
944 }
|
|
945 )
|
|
946 )
|
|
947
|
|
948 .my.pmin = function(x, val) {
|
|
949 # minimum "pmin" function that can handle empty array
|
|
950 if(length(x) == 0) {
|
|
951 return(x)
|
|
952 } else {
|
|
953 return(pmin.int(x,val));
|
|
954 }
|
|
955 }
|
|
956
|
|
957 .my.pmax = function(x, val) {
|
|
958 # minimum "pmin" function that can handle empty array
|
|
959 if(length(x) == 0) {
|
|
960 return(x)
|
|
961 } else {
|
|
962 return(pmax.int(x,val));
|
|
963 }
|
|
964 }
|
|
965
|
|
966 .pv.nz = function(x){return( .my.pmax(x,.Machine$double.xmin) )}
|
|
967
|
|
968 .SetNanRowMean = function(x) {
|
|
969 if( any(is.na(x)) ) {
|
|
970 rowmean = rowMeans(x, na.rm = TRUE);
|
|
971 rowmean[ is.na(rowmean) ] = 0;
|
|
972 for( j in which(!complete.cases(x)) ) {
|
|
973 where1 = is.na( x[j, ] );
|
|
974 x[j,where1] = rowmean[j];
|
|
975 }
|
|
976 }
|
|
977 return(x);
|
|
978 }
|
|
979
|
|
980 # .SNP_process_split_for_ANOVA = function(x) {
|
|
981 # # split into 2 dummy variables
|
|
982 #
|
|
983 # uniq = unique(c(x));
|
|
984 # uniq = uniq[!is.na(uniq)];
|
|
985 #
|
|
986 # if( length(uniq) > 3 ) {
|
|
987 # stop("More than three genotype categories is not handled by ANOVA");
|
|
988 # } else if ( length(uniq) < 3 ) {
|
|
989 # uniq = c(uniq, min(uniq)-(1:(3-length(uniq))));
|
|
990 # }
|
|
991 #
|
|
992 # freq = matrix(0, nrow(x), length(uniq));
|
|
993 # for(i in 1:length(uniq)) {
|
|
994 # freq[ ,i] = rowSums(x==uniq[i], na.rm = TRUE);
|
|
995 # }
|
|
996 #
|
|
997 # md = apply(freq, 1, which.max);
|
|
998 # freq[ cbind(1:nrow(x),md) ] = -1;
|
|
999 #
|
|
1000 # md = apply(freq, 1, which.max); # min(freq[cbind(1:nrow(slice),md)] - rowSums(select,na.rm = TRUE ))
|
|
1001 # new_slice1 = (x == uniq[md]);
|
|
1002 # new_slice1[is.na(new_slice1)] = 0;
|
|
1003 # freq[ cbind(1:nrow(x),md) ] = -1;
|
|
1004 #
|
|
1005 # md = apply(freq,1,which.max);
|
|
1006 # new_slice2 = (x == uniq[md]);
|
|
1007 # new_slice2[ is.na(new_slice2) ] = 0;
|
|
1008 # rez = vector("list", 2);
|
|
1009 # rez[[1]] = new_slice1;
|
|
1010 # rez[[2]] = new_slice2;
|
|
1011 # return( rez );
|
|
1012 # }
|
|
1013
|
|
1014 .SNP_process_split_for_ANOVA = function(x,n.groups) {
|
|
1015 # split into 2 dummy variables (or more)
|
|
1016
|
|
1017 # # Get the number of ANOVA groups
|
|
1018 # n.groups = options('MatrixEQTL.ANOVA.categories')[[1]];
|
|
1019 # if( is.null(n.groups))
|
|
1020 # n.groups = 3;
|
|
1021
|
|
1022 # Unique values in x (make sure it has length of n.groups);
|
|
1023 uniq = unique(as.vector(x));
|
|
1024 uniq = uniq[!is.na(uniq)];
|
|
1025 if( length(uniq) > n.groups ) {
|
|
1026 stop("More than declared number of genotype categories is detected by ANOVA");
|
|
1027 } else if ( length(uniq) < n.groups ) {
|
|
1028 uniq = c(uniq, rep(min(uniq)-1, n.groups-length(uniq)));
|
|
1029 }
|
|
1030
|
|
1031 # Table of frequencies for each variable (row)
|
|
1032 freq = matrix(0, nrow(x), n.groups);
|
|
1033 for(i in 1:n.groups) {
|
|
1034 freq[ ,i] = rowSums(x==uniq[i], na.rm = TRUE);
|
|
1035 }
|
|
1036 # remove NA's from x for convenience
|
|
1037 x[is.na(x)] = min(uniq)-2;
|
|
1038
|
|
1039 # Output list of matrices
|
|
1040 rez = vector('list',n.groups-1);
|
|
1041
|
|
1042 # Skip the most frequent value
|
|
1043 md = apply(freq, 1, which.max); # most frequent value for each variable
|
|
1044 freq[ cbind(1:nrow(x),md) ] = -1;
|
|
1045
|
|
1046 # The rest form dumm
|
|
1047 for(j in 1:(n.groups-1)){
|
|
1048 md = apply(freq, 1, which.max);
|
|
1049 freq[ cbind(1:nrow(x),md) ] = -1;
|
|
1050 rez[[j]] = (x == uniq[md]);
|
|
1051 }
|
|
1052 return( rez );
|
|
1053 }
|
|
1054
|
|
1055 Matrix_eQTL_engine = function(
|
|
1056 snps,
|
|
1057 gene,
|
|
1058 cvrt = SlicedData$new(),
|
|
1059 output_file_name,
|
|
1060 pvOutputThreshold = 1e-5,
|
|
1061 useModel = modelLINEAR,
|
|
1062 errorCovariance = numeric(),
|
|
1063 verbose = TRUE,
|
|
1064 pvalue.hist = FALSE,
|
|
1065 min.pv.by.genesnp = FALSE,
|
|
1066 noFDRsaveMemory = FALSE) {
|
|
1067 rez = Matrix_eQTL_main(
|
|
1068 snps = snps,
|
|
1069 gene = gene,
|
|
1070 cvrt = cvrt,
|
|
1071 output_file_name = output_file_name,
|
|
1072 pvOutputThreshold = pvOutputThreshold,
|
|
1073 useModel = useModel,
|
|
1074 errorCovariance = errorCovariance,
|
|
1075 verbose = verbose,
|
|
1076 pvalue.hist = pvalue.hist,
|
|
1077 min.pv.by.genesnp = min.pv.by.genesnp,
|
|
1078 noFDRsaveMemory = noFDRsaveMemory);
|
|
1079 return( rez );
|
|
1080 }
|
|
1081
|
|
1082 Matrix_eQTL_main = function(
|
|
1083 snps,
|
|
1084 gene,
|
|
1085 cvrt = SlicedData$new(),
|
|
1086 output_file_name = "",
|
|
1087 pvOutputThreshold = 1e-5,
|
|
1088 useModel = modelLINEAR,
|
|
1089 errorCovariance = numeric(),
|
|
1090 verbose = TRUE,
|
|
1091 output_file_name.cis = "",
|
|
1092 pvOutputThreshold.cis = 0,
|
|
1093 snpspos = NULL,
|
|
1094 genepos = NULL,
|
|
1095 cisDist = 1e6,
|
|
1096 pvalue.hist = FALSE,
|
|
1097 min.pv.by.genesnp = FALSE,
|
|
1098 noFDRsaveMemory = FALSE) {
|
|
1099 ################################# Basic variable checks #################################
|
|
1100 {
|
|
1101 # status("Performing basic checks of the input variables");
|
|
1102 stopifnot( "SlicedData" %in% class(gene) );
|
|
1103 stopifnot( "SlicedData" %in% class(snps) );
|
|
1104 stopifnot( "SlicedData" %in% class(cvrt) );
|
|
1105
|
|
1106 # Check dimensions
|
|
1107 if( min(snps$nRows(),snps$nCols()) == 0 )
|
|
1108 stop("Empty genotype dataset");
|
|
1109 if( min(gene$nRows(),gene$nCols()) == 0 )
|
|
1110 stop("Empty expression dataset");
|
|
1111 if( snps$nCols() != gene$nCols() )
|
|
1112 stop("Different number of samples in the genotype and gene expression files");
|
|
1113 if( cvrt$nRows()>0 ) {
|
|
1114 if( snps$nCols() != cvrt$nCols() )
|
|
1115 stop("Wrong number of samples in the matrix of covariates");
|
|
1116 }
|
|
1117
|
|
1118 stopifnot( class(pvOutputThreshold) == "numeric" );
|
|
1119 stopifnot( length(pvOutputThreshold) == 1 );
|
|
1120 stopifnot( pvOutputThreshold >= 0 );
|
|
1121 stopifnot( pvOutputThreshold <= 1 );
|
|
1122
|
|
1123 stopifnot( class(noFDRsaveMemory) == "logical" );
|
|
1124 stopifnot( length(noFDRsaveMemory) == 1 );
|
|
1125
|
|
1126 if( pvOutputThreshold > 0 ) {
|
|
1127 stopifnot( !((length(output_file_name) == 0) && noFDRsaveMemory) )
|
|
1128 stopifnot( length(output_file_name) <= 1 );
|
|
1129 if( length(output_file_name) == 1 ) {
|
|
1130 stopifnot( class(output_file_name) %in% c("character","connection") );
|
|
1131 }
|
|
1132 }
|
|
1133
|
|
1134 stopifnot( class(pvOutputThreshold.cis) == "numeric" );
|
|
1135 stopifnot( length(pvOutputThreshold.cis) == 1 );
|
|
1136 stopifnot( pvOutputThreshold.cis >= 0 );
|
|
1137 stopifnot( pvOutputThreshold.cis <= 1 );
|
|
1138 stopifnot( !((pvOutputThreshold > 0) & (pvOutputThreshold.cis > 0) & (pvOutputThreshold > pvOutputThreshold.cis)) );
|
|
1139 stopifnot( (pvOutputThreshold > 0) | (pvOutputThreshold.cis > 0) );
|
|
1140
|
|
1141 stopifnot( class(useModel) == class(modelLINEAR) );
|
|
1142 stopifnot( length(useModel) == 1 );
|
|
1143 stopifnot( useModel %in% c(modelLINEAR, modelANOVA, modelLINEAR_CROSS) );
|
|
1144 if( useModel %in% c(modelLINEAR, modelLINEAR_CROSS) ) {
|
|
1145 if( snps$nCols() <= cvrt$nRows() + 1 + 1) {
|
|
1146 stop('The number of covariates exceeds the number of samples.\nLinear regression can not be fit.')
|
|
1147 }
|
|
1148 }
|
|
1149 if( useModel == modelLINEAR_CROSS ) {
|
|
1150 if( cvrt$nRows() == 0 ) {
|
|
1151 stop( "Model \"modelLINEAR_CROSS\" requires at least one covariate" );
|
|
1152 }
|
|
1153 }
|
|
1154 if( useModel == modelANOVA ) {
|
|
1155 n.anova.groups = getOption('MatrixEQTL.ANOVA.categories', 3);
|
|
1156 stopifnot( n.anova.groups == floor(n.anova.groups) );
|
|
1157 stopifnot( n.anova.groups >= 3 );
|
|
1158 # stopifnot( n.anova.groups < snps$nCols() - cvrt$nRows() - 2 );
|
|
1159 if( snps$nCols() <= cvrt$nRows() + n.anova.groups) {
|
|
1160 stop('The number of covariates exceeds the number of samples.\nLinear regression (ANOVA) can not be fit.')
|
|
1161 }
|
|
1162 }
|
|
1163
|
|
1164 stopifnot( class(verbose) == "logical" );
|
|
1165 stopifnot( length(verbose) == 1 );
|
|
1166
|
|
1167 stopifnot( class(min.pv.by.genesnp) == "logical" );
|
|
1168 stopifnot( length(min.pv.by.genesnp) == 1 );
|
|
1169
|
|
1170 if( pvOutputThreshold.cis > 0 ) {
|
|
1171 stopifnot( !((length(output_file_name.cis) == 0) && noFDRsaveMemory) )
|
|
1172 stopifnot( length(output_file_name.cis) <= 1 );
|
|
1173 if( length(output_file_name.cis) == 1 ) {
|
|
1174 stopifnot( class(output_file_name.cis) %in% c("character","connection") );
|
|
1175 }
|
|
1176
|
|
1177 # stopifnot( class(output_file_name.cis) == "character" );
|
|
1178 # stopifnot( length(output_file_name.cis) == 1 );
|
|
1179 stopifnot( class(snpspos) == "data.frame" );
|
|
1180 stopifnot( ncol(snpspos) == 3 );
|
|
1181 stopifnot( nrow(snpspos) > 0 );
|
|
1182 stopifnot( class(snpspos[1,3]) %in% c("integer", "numeric") )
|
|
1183 stopifnot( !any(is.na(snpspos[,3])) )
|
|
1184 stopifnot( class(genepos) == "data.frame" );
|
|
1185 stopifnot( ncol(genepos) == 4 );
|
|
1186 stopifnot( nrow(genepos) > 0 );
|
|
1187 stopifnot( class(genepos[1,3]) %in% c("integer", "numeric") )
|
|
1188 stopifnot( class(genepos[1,4]) %in% c("integer", "numeric") )
|
|
1189 stopifnot( !any(is.na(genepos[,3])) )
|
|
1190 stopifnot( !any(is.na(genepos[,4])) )
|
|
1191 stopifnot( nzchar(output_file_name.cis) )
|
|
1192 }
|
|
1193
|
|
1194 if( pvOutputThreshold > 0 ) {
|
|
1195 stopifnot( nzchar(output_file_name) )
|
|
1196 }
|
|
1197
|
|
1198 stopifnot( class(errorCovariance) %in% c("numeric", "matrix") );
|
|
1199 errorCovariance = as.matrix(errorCovariance);
|
|
1200 if(length(errorCovariance)>0) {
|
|
1201 if( nrow(errorCovariance) != ncol(errorCovariance) ) {
|
|
1202 stop("The covariance matrix is not square");
|
|
1203 }
|
|
1204 if( nrow(errorCovariance) != snps$nCols() ) {
|
|
1205 stop("The covariance matrix size does not match the number of samples");
|
|
1206 }
|
|
1207 if( !all(errorCovariance == t(errorCovariance)) ) {
|
|
1208 stop("The covariance matrix is not symmetric");
|
|
1209 }
|
|
1210 }
|
|
1211 }
|
|
1212 ################################# Initial setup #########################################
|
|
1213 {
|
|
1214 gene.std = .listBuilder$new();
|
|
1215 snps.std = .listBuilder$new();
|
|
1216
|
|
1217 dont.clone.gene = getOption('MatrixEQTL.dont.preserve.gene.object', FALSE)
|
|
1218 if(is.null(dont.clone.gene))
|
|
1219 dont.clone.gene = FALSE;
|
|
1220
|
|
1221 if( !dont.clone.gene )
|
|
1222 gene = gene$Clone();
|
|
1223 # snps = snps$Clone(); # snps is read only
|
|
1224 cvrt = cvrt$Clone();
|
|
1225
|
|
1226 params = list(
|
|
1227 output_file_name = output_file_name,
|
|
1228 pvOutputThreshold = pvOutputThreshold,
|
|
1229 useModel = useModel,
|
|
1230 errorCovariance = errorCovariance ,
|
|
1231 verbose = verbose,
|
|
1232 output_file_name.cis = output_file_name.cis,
|
|
1233 pvOutputThreshold.cis = pvOutputThreshold.cis,
|
|
1234 cisDist = cisDist ,
|
|
1235 pvalue.hist = pvalue.hist,
|
|
1236 min.pv.by.genesnp = min.pv.by.genesnp);
|
|
1237
|
|
1238 if( verbose ) {
|
|
1239 lastTime = 0;
|
|
1240 status <- function(text) {
|
|
1241 # gc();
|
|
1242 newTime = proc.time()[3];
|
|
1243 if(lastTime != 0) {
|
|
1244 cat("Task finished in ", newTime-lastTime, " seconds\n");
|
|
1245 }
|
|
1246 cat(text,"\n");
|
|
1247 lastTime <<- newTime;
|
|
1248 unused = flush.console();
|
|
1249 }
|
|
1250 } else {
|
|
1251 status = function(text){}
|
|
1252 }
|
|
1253 start.time = proc.time()[3];
|
|
1254 }
|
|
1255 ################################# Error covariance matrix processing ####################
|
|
1256 {
|
|
1257 if( length(errorCovariance) > 0 ) {
|
|
1258 status("Processing the error covariance matrix");
|
|
1259 eig = eigen(errorCovariance, symmetric = TRUE)
|
|
1260 d = eig$values;
|
|
1261 v = eig$vectors;
|
|
1262 # errorCovariance == v %*% diag(d) %*% t(v)
|
|
1263 # errorCovariance^0.5 == v*sqrt(d)*v" (no single quotes anymore)
|
|
1264 # errorCovariance^(-0.5) == v*diag(1./sqrt(diag(d)))*v"
|
|
1265 if( any(d<=0) ) {
|
|
1266 stop("The covariance matrix is not positive definite");
|
|
1267 }
|
|
1268 correctionMatrix = v %*% diag(1./sqrt(d)) %*% t(v);
|
|
1269 rm( eig, v, d, errorCovariance )
|
|
1270 } else {
|
|
1271 rm( errorCovariance );
|
|
1272 correctionMatrix = numeric();
|
|
1273 }
|
|
1274 }
|
|
1275 ################################# Matching gene and SNPs locations ######################
|
|
1276 if( pvOutputThreshold.cis > 0 ) {
|
|
1277 status("Matching data files and location files")
|
|
1278
|
|
1279 # names in the input data
|
|
1280 gene_names = gene$GetAllRowNames();
|
|
1281 snps_names = snps$GetAllRowNames();
|
|
1282
|
|
1283 # gene range, set: left<right
|
|
1284 if(any(genepos[,3] > genepos[,4])) {
|
|
1285 temp3 = genepos[,3];
|
|
1286 temp4 = genepos[,4];
|
|
1287 genepos[,3] = pmin(temp3,temp4);
|
|
1288 genepos[,4] = pmax(temp3,temp4);
|
|
1289 rm(temp3, temp4);
|
|
1290 }
|
|
1291
|
|
1292 # match with the location data
|
|
1293 genematch = match( gene_names, genepos[ ,1], nomatch = 0L);
|
|
1294 usedgene = matrix(FALSE, nrow(genepos), 1); # genes in "genepos" that are matching "gene_names"
|
|
1295 usedgene[ genematch ] = TRUE;
|
|
1296 if( !any(genematch) ) {
|
|
1297 stop("Gene names do not match those in the gene location file.");
|
|
1298 }
|
|
1299 cat( sum(genematch>0), "of", length(gene_names), " genes matched\n");
|
|
1300
|
|
1301
|
|
1302 snpsmatch = match( snps_names, snpspos[ ,1], nomatch = 0L);
|
|
1303 usedsnps = matrix(FALSE, nrow(snpspos),1);
|
|
1304 usedsnps[ snpsmatch ] = TRUE;
|
|
1305 if( !any(snpsmatch) ) {
|
|
1306 stop("SNP names do not match those in the SNP location file.");
|
|
1307 }
|
|
1308 cat( sum(snpsmatch>0), "of", length(snps_names), " SNPs matched\n");
|
|
1309
|
|
1310 # list used chr names
|
|
1311 chrNames = unique(c( as.character(unique(snpspos[usedsnps,2])),
|
|
1312 as.character(unique(genepos[usedgene,2])) ))
|
|
1313 chrNames = chrNames[ sort.list( suppressWarnings(as.integer(chrNames)),
|
|
1314 method = "radix", na.last = TRUE ) ];
|
|
1315 # match chr names
|
|
1316 genechr = match(genepos[,2],chrNames);
|
|
1317 snpschr = match(snpspos[,2],chrNames);
|
|
1318
|
|
1319 # max length of a chromosome
|
|
1320 chrMax = max( snpspos[usedsnps, 3], genepos[usedgene, 4], na.rm = TRUE) + cisDist;
|
|
1321
|
|
1322 # Single number location for all rows in "genepos" and "snpspos"
|
|
1323 genepos2 = as.matrix(genepos[ ,3:4, drop = FALSE] + (genechr-1)*chrMax);
|
|
1324 snpspos2 = as.matrix(snpspos[ ,3 , drop = FALSE] + (snpschr-1)*chrMax);
|
|
1325
|
|
1326 # the final location arrays;
|
|
1327 snps_pos = matrix(0,length(snps_names),1);
|
|
1328 snps_pos[snpsmatch>0, ] = snpspos2[snpsmatch, , drop = FALSE];
|
|
1329 snps_pos[rowSums(is.na(snps_pos))>0, ] = 0;
|
|
1330 snps_pos[snps_pos==0] = (length(chrNames)+1) * (chrMax+cisDist);
|
|
1331 rm(snps_names, snpsmatch, usedsnps, snpschr, snpspos2)
|
|
1332
|
|
1333 gene_pos = matrix(0,length(gene_names),2);
|
|
1334 gene_pos[genematch>0, ] = genepos2[genematch, , drop = FALSE];
|
|
1335 gene_pos[rowSums(is.na(gene_pos))>0, ] = 0;
|
|
1336 gene_pos[gene_pos==0] = (length(chrNames)+2) * (chrMax+cisDist);
|
|
1337 rm(gene_names, genematch, usedgene, genechr, genepos2)
|
|
1338 rm(chrNames, chrMax);
|
|
1339
|
|
1340 if( is.unsorted(snps_pos) ) {
|
|
1341 status("Reordering SNPs\n");
|
|
1342 ordr = sort.list(snps_pos);
|
|
1343 snps$RowReorder(ordr);
|
|
1344 snps_pos = snps_pos[ordr, , drop = FALSE];
|
|
1345 rm(ordr);
|
|
1346 }
|
|
1347 if( is.unsorted(rowSums(gene_pos)) ) {
|
|
1348 status("Reordering genes\n");
|
|
1349 ordr = sort.list(rowSums(gene_pos));
|
|
1350 gene$RowReorder(ordr);
|
|
1351 gene_pos = gene_pos[ordr, , drop = FALSE];
|
|
1352 rm(ordr);
|
|
1353 }
|
|
1354
|
|
1355 # Slice it back.
|
|
1356 geneloc = vector("list", gene$nSlices())
|
|
1357 gene_offset = 0;
|
|
1358 for(gc in 1:gene$nSlices()) {
|
|
1359 nr = length(gene$rowNameSlices[[gc]]);
|
|
1360 geneloc[[gc]] = gene_pos[gene_offset + (1:nr), , drop = FALSE];
|
|
1361 gene_offset = gene_offset + nr;
|
|
1362 }
|
|
1363 rm(gc, gene_offset, gene_pos);
|
|
1364
|
|
1365 snpsloc = vector("list", snps$nSlices())
|
|
1366 snps_offset = 0;
|
|
1367 for(sc in 1:snps$nSlices()) {
|
|
1368 nr = length(snps$rowNameSlices[[sc]]);
|
|
1369 snpsloc[[sc]] = snps_pos[snps_offset + (1:nr), , drop = FALSE];
|
|
1370 snps_offset = snps_offset + nr;
|
|
1371 }
|
|
1372 rm(nr, sc, snps_offset, snps_pos);
|
|
1373 }
|
|
1374 ################################# Covariates processing #################################
|
|
1375 {
|
|
1376 status("Processing covariates");
|
|
1377 if( useModel == modelLINEAR_CROSS ) {
|
|
1378 last.covariate = as.vector(tail( cvrt$getSlice(cvrt$nSlices()), n = 1));
|
|
1379 }
|
|
1380 if( cvrt$nRows()>0 ) {
|
|
1381 cvrt$SetNanRowMean();
|
|
1382 cvrt$CombineInOneSlice();
|
|
1383 cvrt = rbind(matrix(1,1,snps$nCols()),cvrt$getSlice(1));
|
|
1384 } else {
|
|
1385 cvrt = matrix(1,1,snps$nCols());
|
|
1386 }
|
|
1387 # Correct for the error covariance structure
|
|
1388 if( length(correctionMatrix)>0 ) {
|
|
1389 cvrt = cvrt %*% correctionMatrix;
|
|
1390 }
|
|
1391 # Orthonormalize covariates
|
|
1392 # status("Orthonormalizing covariates");
|
|
1393 q = qr(t(cvrt));
|
|
1394 if( min(abs(diag(qr.R(q)))) < .Machine$double.eps * snps$nCols() ) {
|
|
1395 stop("Colinear or zero covariates detected");
|
|
1396 }
|
|
1397 cvrt = t( qr.Q(q) );
|
|
1398 rm( q );
|
|
1399 }
|
|
1400 ################################# Gene expression processing ############################
|
|
1401 {
|
|
1402 status("Processing gene expression data (imputation, residualization, etc.)");
|
|
1403 # Impute gene expression
|
|
1404 gene$SetNanRowMean();
|
|
1405 # Correct for the error covariance structure
|
|
1406 if( length(correctionMatrix)>0 ) {
|
|
1407 gene$RowMatrixMultiply(correctionMatrix);
|
|
1408 }
|
|
1409 # Orthogonolize expression w.r.t. covariates
|
|
1410 # status("Orthogonolizing expression w.r.t. covariates");
|
|
1411 gene_offsets = double(gene$nSlices()+1);
|
|
1412 for( sl in 1:gene$nSlices() ) {
|
|
1413 slice = gene$getSlice(sl);
|
|
1414 gene_offsets[sl+1] = gene_offsets[sl] + nrow(slice);
|
|
1415 rowsq1 = rowSums(slice^2);
|
|
1416 slice = slice - tcrossprod(slice,cvrt) %*% cvrt;
|
|
1417 rowsq2 = rowSums(slice^2);
|
|
1418 # kill rows colinear with the covariates
|
|
1419 delete.rows = (rowsq2 <= rowsq1 * .Machine$double.eps );
|
|
1420 slice[delete.rows] = 0;
|
|
1421 div = sqrt( rowSums(slice^2) );
|
|
1422 div[ div == 0 ] = 1;
|
|
1423 gene.std$set(sl, div);
|
|
1424 gene$setSlice(sl, slice / div);
|
|
1425 }
|
|
1426 rm(rowsq1, rowsq2, delete.rows, div);
|
|
1427 rm( sl, slice );
|
|
1428 #gene$RowRemoveZeroEps();
|
|
1429 }
|
|
1430 ################################# snps_process, testfun, pvfun, threshfun, afun ########
|
|
1431 {
|
|
1432 # snps_process - preprocess SNPs slice
|
|
1433 #
|
|
1434 # afun --- abs for signed stats, identity for non-negative
|
|
1435 # threshfun --- internal stat threshold for given p-value
|
|
1436 # testfun --- t or F statistic from the internal one
|
|
1437 # pvfun --- p-value from the t or F statistic
|
|
1438
|
|
1439 nSamples = snps$nCols();
|
|
1440 nGenes = gene$nRows();
|
|
1441 nSnps = snps$nRows();
|
|
1442 nCov = nrow(cvrt);
|
|
1443 # nVarTested = length(snps_list); # set in case(useModel)
|
|
1444 # dfNull = nSamples - nCov;
|
|
1445 # d.f. of the full model
|
|
1446 betafun = NULL;
|
|
1447
|
|
1448 if( useModel == modelLINEAR ) {
|
|
1449 snps_process = function(x) {
|
|
1450 return( list(.SetNanRowMean(x)) );
|
|
1451 };
|
|
1452 nVarTested = 1;
|
|
1453 dfFull = nSamples - nCov - nVarTested;
|
|
1454 statistic.fun = function(mat_list) {
|
|
1455 return( mat_list[[1]] );
|
|
1456 }
|
|
1457 afun = function(x) {return(abs(x))};
|
|
1458 threshfun = function(pv) {
|
|
1459 thr = qt(pv/2, dfFull, lower.tail = FALSE);
|
|
1460 thr = thr^2;
|
|
1461 thr = sqrt( thr / (dfFull + thr) );
|
|
1462 thr[pv >= 1] = 0;
|
|
1463 thr[pv <= 0] = 1;
|
|
1464 return( thr );
|
|
1465 }
|
|
1466 testfun = function(x) { return( x * sqrt( dfFull / (1 - .my.pmin(x^2,1)))); }
|
|
1467 pvfun = function(x) { return( .pv.nz(pt(-abs(x),dfFull)*2)); }
|
|
1468 thresh.cis = threshfun(pvOutputThreshold.cis);
|
|
1469 thresh = threshfun(pvOutputThreshold);
|
|
1470 betafun = function(stat, ss, gg, select) {
|
|
1471 return(stat * gene.std$get(gg)[select[,1]] / snps.std$get(ss)[select[,2]]);
|
|
1472 }
|
|
1473 } else
|
|
1474 if( useModel == modelANOVA ) {
|
|
1475 snps_process = function(x).SNP_process_split_for_ANOVA(x,n.anova.groups);
|
|
1476 nVarTested = n.anova.groups - 1;
|
|
1477 dfFull = nSamples - nCov - nVarTested;
|
|
1478 # statistic.fun = function(mat_list) {
|
|
1479 # return( mat_list[[1]]^2 + mat_list[[2]]^2 );
|
|
1480 # }
|
|
1481 statistic.fun = function(mat_list) {
|
|
1482 x = mat_list[[1]]^2;
|
|
1483 for( j in 2:length(mat_list) )
|
|
1484 x = x + mat_list[[j]]^2;
|
|
1485 return( x );
|
|
1486 }
|
|
1487 afun = identity;
|
|
1488 threshfun = function(pv) {
|
|
1489 thr = qf(pv, nVarTested, dfFull, lower.tail = FALSE);
|
|
1490 thr = thr / (dfFull/nVarTested + thr);
|
|
1491 thr[pv >= 1] = 0;
|
|
1492 thr[pv <= 0] = 1;
|
|
1493 return( thr );
|
|
1494 }
|
|
1495 testfun = function(x) { return( x / (1 - .my.pmin(x,1)) * (dfFull/nVarTested) ); }
|
|
1496 pvfun = function(x) { return( .pv.nz(pf(x, nVarTested, dfFull, lower.tail = FALSE)) ); }
|
|
1497 thresh.cis = threshfun(pvOutputThreshold.cis);
|
|
1498 thresh = threshfun(pvOutputThreshold);
|
|
1499 } else
|
|
1500 if( useModel == modelLINEAR_CROSS ) {
|
|
1501 last.covariate = as.vector( last.covariate );
|
|
1502 snps_process = .SNP_process_split_for_LINEAR_CROSS = function(x) {
|
|
1503 out = vector("list", 2);
|
|
1504 out[[1]] = .SetNanRowMean(x);
|
|
1505 out[[2]] = t( t(out[[1]]) * last.covariate );
|
|
1506 return( out );
|
|
1507 };
|
|
1508 nVarTested = 1;
|
|
1509 dfFull = nSamples - nCov - nVarTested - 1;
|
|
1510 statistic.fun = function(mat_list) {
|
|
1511 return( mat_list[[2]] / sqrt(1 - mat_list[[1]]^2) );
|
|
1512 }
|
|
1513 afun = function(x) {return(abs(x))};
|
|
1514 threshfun = function(pv) {
|
|
1515 thr = qt(pv/2, dfFull, lower.tail = FALSE);
|
|
1516 thr = thr^2;
|
|
1517 thr = sqrt( thr / (dfFull + thr) );
|
|
1518 thr[pv >= 1] = 0;
|
|
1519 thr[pv <= 0] = 1;
|
|
1520 return( thr );
|
|
1521 }
|
|
1522 testfun = function(x) { return( x * sqrt( dfFull / (1 - .my.pmin(x^2,1)))); }
|
|
1523 pvfun = function(x) { return( .pv.nz(pt(-abs(x),dfFull)*2 )); }
|
|
1524 thresh.cis = threshfun(pvOutputThreshold.cis);
|
|
1525 thresh = threshfun(pvOutputThreshold);
|
|
1526 betafun = function(stat, ss, gg, select) {
|
|
1527 return(stat * gene.std$get(gg)[select[,1]] / snps.std$get(ss)[select[,2]]);
|
|
1528 }
|
|
1529 }
|
|
1530 params$dfFull = dfFull;
|
|
1531 }
|
|
1532 ################################# Saver class(es) creation ##############################
|
|
1533 {
|
|
1534 status("Creating output file(s)");
|
|
1535 if(noFDRsaveMemory) {
|
|
1536 if( pvOutputThreshold > 0 ) {
|
|
1537 saver.tra = .OutputSaver_direct$new();
|
|
1538 }
|
|
1539 if( pvOutputThreshold.cis > 0 ) {
|
|
1540 saver.cis = .OutputSaver_direct$new();
|
|
1541 }
|
|
1542 } else {
|
|
1543 if( pvOutputThreshold > 0 ) {
|
|
1544 saver.tra = .OutputSaver_FRD$new();
|
|
1545 }
|
|
1546 if( pvOutputThreshold.cis > 0 ) {
|
|
1547 saver.cis = .OutputSaver_FRD$new();
|
|
1548 }
|
|
1549 }
|
|
1550 if( pvOutputThreshold > 0 )
|
|
1551 if( pvOutputThreshold * gene$nRows() * snps$nRows() > 1000000 )
|
|
1552 if(!noFDRsaveMemory)
|
|
1553 cat('Warning: pvOutputThreshold may be too large.\nExpected number of findings > ',
|
|
1554 pvOutputThreshold * gene$nRows() * snps$nRows(),'\n');
|
|
1555 if( (useModel == modelLINEAR) || (useModel == modelLINEAR_CROSS) ) {
|
|
1556 statistic_name = "t-stat";
|
|
1557 } else if( useModel == modelANOVA ) {
|
|
1558 statistic_name = "F-test";
|
|
1559 }
|
|
1560 if(!is.null(betafun))
|
|
1561 statistic_name = paste('beta\t',statistic_name, sep='');
|
|
1562 if( pvOutputThreshold > 0 )
|
|
1563 saver.tra$start(output_file_name, statistic_name, snps, gene, testfun, pvfun);
|
|
1564 if( pvOutputThreshold.cis > 0 )
|
|
1565 saver.cis$start(output_file_name.cis, statistic_name, snps, gene, testfun, pvfun);
|
|
1566 rm( statistic_name );
|
|
1567 }
|
|
1568 ################################# Some useful functions #################################
|
|
1569 {
|
|
1570 orthonormalize.snps = function(cursnps, ss) {
|
|
1571 for(p in 1:length(cursnps)) {
|
|
1572 if(length(correctionMatrix)>0) {
|
|
1573 cursnps[[p]] = cursnps[[p]] %*% correctionMatrix;
|
|
1574 }
|
|
1575 cursnps[[p]] = cursnps[[p]] - tcrossprod(cursnps[[p]],cvrt) %*% cvrt;
|
|
1576 for(w in .seq(1,p-1L))
|
|
1577 cursnps[[p]] = cursnps[[p]] - rowSums(cursnps[[p]]*cursnps[[w]]) * cursnps[[w]];
|
|
1578 div = sqrt( rowSums(cursnps[[p]]^2) );
|
|
1579 div[ div == 0 ] = 1;
|
|
1580 cursnps[[p]] = cursnps[[p]]/div;
|
|
1581 }
|
|
1582 snps.std$set(ss, div);
|
|
1583 return(cursnps);
|
|
1584 }
|
|
1585 # if( pvOutputThreshold.cis > 0 ) {
|
|
1586 # is.cis.pair = function(gg,ss) {
|
|
1587 # return(!( ( snpsloc[[ss]][1, 1] - tail( geneloc[[gg]][ , 2], n = 1L) > cisDist) |
|
|
1588 # ( geneloc[[gg]][1, 1] - tail( snpsloc[[ss]] , n = 1L) > cisDist) ) );
|
|
1589 # }
|
|
1590 # }
|
|
1591 if( pvOutputThreshold.cis > 0 ) {
|
|
1592 # sn.l = sapply(snpsloc, function(x)x[1] );
|
|
1593 # sn.r = sapply(snpsloc, function(x)tail(x,1) );
|
|
1594 # ge.l = sapply(geneloc, function(x)x[1,1] );
|
|
1595 # ge.r = sapply(geneloc, function(x)x[nrow(x) , 2] );
|
|
1596 sn.l = sapply(snpsloc, '[', 1 );
|
|
1597 sn.r = sapply(snpsloc, tail, 1 );
|
|
1598 ge.l = sapply(geneloc, '[', 1, 1 );
|
|
1599 ge.r = sapply( lapply(geneloc, tail.matrix, 1 ), '[', 2);
|
|
1600 gg.1 = findInterval( sn.l , ge.r + cisDist +1) + 1;
|
|
1601 # cat(gg.1,'\n')
|
|
1602 gg.2 = findInterval( sn.r , ge.l - cisDist );
|
|
1603 # cat(gg.2,'\n')
|
|
1604 rm(sn.l, sn.r, ge.l, ge.r);
|
|
1605 }
|
|
1606
|
|
1607 }
|
|
1608 ################################# Prepare counters and histogram bins ###################
|
|
1609 {
|
|
1610 pvbins = NULL; # bin edges for p-values
|
|
1611 statbins = 0; # bin edges for the test statistic (|t| or F)
|
|
1612 do.hist = FALSE;
|
|
1613 if( length(pvalue.hist) == 1 ) {
|
|
1614 if(pvalue.hist == "qqplot") {
|
|
1615 pvbins = c(0, 10^rev(seq(0, log10(.Machine$double.xmin)-1, -0.05)));
|
|
1616 } else
|
|
1617 if( is.numeric(pvalue.hist) ) {
|
|
1618 pvbins = seq(from = 0, to = 1, length.out = pvalue.hist+1);
|
|
1619 } else
|
|
1620 if( pvalue.hist == TRUE ) {
|
|
1621 pvbins = seq(from = 0, to = 1, length.out = 100+1);
|
|
1622 }
|
|
1623 } else
|
|
1624 if( is.numeric(pvalue.hist) && (length(pvalue.hist) > 1) ) {
|
|
1625 pvbins = pvalue.hist;
|
|
1626 }
|
|
1627 if( is.null(pvbins) && (pvalue.hist != FALSE) ) {
|
|
1628 stop("Wrong value of pvalue.hist. Must be FALSE, TRUE, \"qqplot\", or numerical");
|
|
1629 }
|
|
1630 do.hist = !is.null(pvbins);
|
|
1631 if( do.hist ) {
|
|
1632 pvbins = sort(pvbins);
|
|
1633 statbins = threshfun(pvbins);
|
|
1634 if( pvOutputThreshold > 0) {
|
|
1635 hist.all = .histogrammer$new(pvbins, statbins);
|
|
1636 }
|
|
1637 if( pvOutputThreshold.cis > 0) {
|
|
1638 hist.cis = .histogrammer$new(pvbins, statbins);
|
|
1639 }
|
|
1640 }
|
|
1641 rm( pvbins, statbins);
|
|
1642 if(min.pv.by.genesnp) {
|
|
1643 if( pvOutputThreshold > 0) {
|
|
1644 minpv.tra = .minpvalue$new(snps,gene);
|
|
1645 }
|
|
1646 if( pvOutputThreshold.cis > 0) {
|
|
1647 minpv.cis = .minpvalue$new(snps,gene);
|
|
1648 }
|
|
1649 }
|
|
1650 }
|
|
1651 ################################# Main loop #############################################
|
|
1652 {
|
|
1653 beta = NULL;
|
|
1654 n.tests.all = 0;
|
|
1655 n.tests.cis = 0;
|
|
1656 n.eqtls.tra = 0;
|
|
1657 n.eqtls.cis = 0;
|
|
1658
|
|
1659 status("Performing eQTL analysis");
|
|
1660 # ss = 1; gg = 1;
|
|
1661 # ss = snps$nSlices(); gg = gene$nSlices();
|
|
1662
|
|
1663 snps_offset = 0;
|
|
1664 for(ss in 1:snps$nSlices()) {
|
|
1665 # for(ss in 1:min(2,snps$nSlices())) { #for debug
|
|
1666 cursnps = NULL;
|
|
1667 nrcs = nrow(snps$getSliceRaw(ss));
|
|
1668
|
|
1669 # loop only through the useful stuff
|
|
1670 for(gg in if(pvOutputThreshold>0){1:gene$nSlices()}else{.seq(gg.1[ss],gg.2[ss])} ) {
|
|
1671 gene_offset = gene_offsets[gg];
|
|
1672 curgene = gene$getSlice(gg);
|
|
1673 nrcg = nrow(curgene);
|
|
1674 if(nrcg == 0) next;
|
|
1675
|
|
1676 rp = "";
|
|
1677
|
|
1678 statistic = NULL;
|
|
1679 select.cis.raw = NULL;
|
|
1680 ## do cis analysis
|
|
1681 # if( (pvOutputThreshold.cis > 0) && ( is.cis.pair(gg, ss) ) ) {
|
|
1682 if( (pvOutputThreshold.cis > 0) && (gg >= gg.1[ss]) && (gg <= gg.2[ss]) ) {
|
|
1683
|
|
1684 if( is.null( statistic ) ) {
|
|
1685 if( is.null( cursnps ) ) {
|
|
1686 cursnps = orthonormalize.snps( snps_process( snps$getSlice(ss) ), ss );
|
|
1687 }
|
|
1688 mat = vector("list", length(cursnps));
|
|
1689 for(d in 1:length(cursnps)) {
|
|
1690 mat[[d]] = tcrossprod(curgene, cursnps[[d]]);
|
|
1691 }
|
|
1692 statistic = statistic.fun( mat );
|
|
1693 astatistic = afun(statistic);
|
|
1694 # rm(mat);
|
|
1695 }
|
|
1696
|
|
1697 # sn.l = findInterval(geneloc[[gg]][ ,1] - cisDist-1 +1 , snpsloc[[ss]]);
|
|
1698 # sn.r = findInterval(geneloc[[gg]][ ,2] + cisDist -1 , snpsloc[[ss]]);
|
|
1699 sn.l = findInterval(geneloc[[gg]][ ,1] - cisDist-1, snpsloc[[ss]]);
|
|
1700 sn.r = findInterval(geneloc[[gg]][ ,2] + cisDist, snpsloc[[ss]]);
|
|
1701 xx = unlist(lapply(which(sn.r>sn.l),FUN=function(x){(sn.l[x]:(sn.r[x]-1))*nrow(statistic)+x}))
|
|
1702 select.cis.raw = xx[ astatistic[xx] >= thresh.cis ];
|
|
1703 select.cis = arrayInd(select.cis.raw, dim(statistic))
|
|
1704
|
|
1705 n.tests.cis = n.tests.cis + length(xx);
|
|
1706 n.eqtls.cis = n.eqtls.cis + length(select.cis.raw);
|
|
1707
|
|
1708 if( do.hist )
|
|
1709 hist.cis$update(astatistic[xx]);
|
|
1710
|
|
1711 if( min.pv.by.genesnp ) {
|
|
1712 # minpv.cis$updatecis(ss, gg, arrayInd(xx, dim(statistic)), astatistic[xx])
|
|
1713 temp = double(length(astatistic));
|
|
1714 dim(temp) = dim(astatistic);
|
|
1715 temp[xx] = astatistic[xx];
|
|
1716 minpv.cis$update(ss, gg, temp);
|
|
1717 }
|
|
1718
|
|
1719 if(!is.null(betafun))
|
|
1720 beta = betafun(mat[[length(mat)]][select.cis.raw], ss, gg, select.cis);
|
|
1721
|
|
1722 saver.cis$update( snps_offset + select.cis[ , 2],
|
|
1723 gene_offset + select.cis[ , 1],
|
|
1724 statistic[select.cis.raw],
|
|
1725 beta);
|
|
1726
|
|
1727 # statistic.select.cis = statistic[ select.cis ];
|
|
1728 # test = testfun( statistic.select.cis );
|
|
1729 # pv = pvfun(test);
|
|
1730 # Saver.cis$WriteBlock( cbind(snps_offset + select.cis[ , 2], gene_offset + select.cis[ , 1], test, pv) );
|
|
1731 # counter.cis$Update(gg, ss, select.cis, pv, n.tests = length(xx), if(do.hist) afun(statistic[xx]) )
|
|
1732 rp = paste(rp, ", ", formatC(n.eqtls.cis, big.mark=",", format = "f", digits = 0), " cis-eQTLs", sep = "");
|
|
1733 }
|
|
1734 ## do trans/all analysis
|
|
1735 if(pvOutputThreshold>0) {
|
|
1736 if( is.null( statistic ) ) {
|
|
1737 if( is.null( cursnps ) ) {
|
|
1738 cursnps = orthonormalize.snps( snps_process( snps$getSlice(ss) ), ss );
|
|
1739 }
|
|
1740 mat = vector("list", length(cursnps));
|
|
1741 for(d in 1:length(cursnps)) {
|
|
1742 mat[[d]] = tcrossprod(curgene, cursnps[[d]]);
|
|
1743 }
|
|
1744 statistic = statistic.fun( mat );
|
|
1745 astatistic = afun(statistic);
|
|
1746 # rm(mat);
|
|
1747 }
|
|
1748
|
|
1749 if( do.hist )
|
|
1750 hist.all$update(astatistic);
|
|
1751
|
|
1752 if(!is.null(select.cis.raw))
|
|
1753 astatistic[xx] = -1;
|
|
1754 # select.tra.raw = select.tra.raw[!(select.tra.raw %in% select.cis.raw)];
|
|
1755
|
|
1756 select.tra.raw = which( astatistic >= thresh);
|
|
1757 select.tra = arrayInd(select.tra.raw, dim(statistic))
|
|
1758
|
|
1759 n.eqtls.tra = n.eqtls.tra + length(select.tra.raw);
|
|
1760 n.tests.all = n.tests.all + length(statistic);
|
|
1761
|
|
1762 if(!is.null(betafun))
|
|
1763 beta = betafun(mat[[length(mat)]][select.tra.raw], ss, gg, select.tra);
|
|
1764
|
|
1765 saver.tra$update( snps_offset + select.tra[ , 2],
|
|
1766 gene_offset + select.tra[ , 1],
|
|
1767 statistic[select.tra.raw],
|
|
1768 beta);
|
|
1769
|
|
1770 if( min.pv.by.genesnp )
|
|
1771 minpv.tra$update(ss, gg, astatistic)
|
|
1772
|
|
1773 # statistic.select.tra = statistic[ select.tra ];
|
|
1774 # test = testfun( statistic.select.tra );
|
|
1775 # pv = pvfun( test );
|
|
1776 # Saver$WriteBlock( cbind( snps_offset + select.tra[ , 2], gene_offset + select.tra[ , 1], test, pv) );
|
|
1777 # counter$Update(gg, ss, select.tra, pv, n.tests = nrcs*nrcg, if(do.hist) afun(statistic) )
|
|
1778 rp = paste(rp, ", ", formatC(n.eqtls.tra, big.mark=",", format = "f", digits = 0), if(pvOutputThreshold.cis > 0)" trans-"else" ","eQTLs", sep = "")
|
|
1779 }
|
|
1780
|
|
1781 #gene_offset = gene_offset + nrcg;
|
|
1782 if( !is.null(statistic) ) {
|
|
1783 per = 100*(gg/gene$nSlices() + ss-1) / snps$nSlices();
|
|
1784 cat( formatC(floor(per*100)/100, format = "f", width = 5, digits = 2), "% done" , rp, "\n", sep = "");
|
|
1785 flush.console();
|
|
1786 }
|
|
1787 } # gg in 1:gene$nSlices()
|
|
1788 snps_offset = snps_offset + nrow(snps$getSliceRaw(ss));
|
|
1789 } # ss in 1:snps$nSlices()
|
|
1790 }
|
|
1791 ################################# Results collection ####################################
|
|
1792 {
|
|
1793 rez = list(time.in.sec = proc.time()[3] - start.time);
|
|
1794 rez$param = params;
|
|
1795
|
|
1796 if(pvOutputThreshold.cis > 0) {
|
|
1797 rez.cis = list(ntests = n.tests.cis, neqtls = n.eqtls.cis);
|
|
1798 rez.cis = c(rez.cis, saver.cis$getResults( gene, snps, n.tests.cis) );
|
|
1799 if(do.hist)
|
|
1800 rez.cis = c(rez.cis, hist.cis$getResults() );
|
|
1801 if(min.pv.by.genesnp)
|
|
1802 rez.cis = c(rez.cis, minpv.cis$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
|
|
1803 }
|
|
1804
|
|
1805 if(pvOutputThreshold>0) {
|
|
1806 rez.all = list(ntests = n.tests.all, neqtls = n.eqtls.tra + n.eqtls.cis);
|
|
1807 if(pvOutputThreshold.cis > 0) {
|
|
1808 rez.tra = list(ntests = n.tests.all - n.tests.cis, neqtls = n.eqtls.tra);
|
|
1809 rez.tra = c(rez.tra, saver.tra$getResults( gene, snps, n.tests.all - n.tests.cis) );
|
|
1810 } else {
|
|
1811 rez.all = c(rez.all, saver.tra$getResults( gene, snps, n.tests.all ) );
|
|
1812 }
|
|
1813 if(do.hist) {
|
|
1814 rez.all = c(rez.all, hist.all$getResults() );
|
|
1815 if(pvOutputThreshold.cis > 0) {
|
|
1816 rez.tra$hist.bins = rez.all$hist.bins;
|
|
1817 rez.tra$hist.counts = rez.all$hist.counts - rez.cis$hist.counts;
|
|
1818 }
|
|
1819 }
|
|
1820 if(min.pv.by.genesnp) {
|
|
1821 if(pvOutputThreshold.cis > 0) {
|
|
1822 rez.tra = c(rez.tra, minpv.tra$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
|
|
1823 } else {
|
|
1824 rez.all = c(rez.all, minpv.tra$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
|
|
1825 }
|
|
1826 }
|
|
1827 }
|
|
1828
|
|
1829 if(exists('rez.all')>0)
|
|
1830 rez$all = rez.all;
|
|
1831 if(exists('rez.tra')>0)
|
|
1832 rez$trans = rez.tra;
|
|
1833 if(exists('rez.cis')>0)
|
|
1834 rez$cis = rez.cis;
|
|
1835
|
|
1836 class(rez) = c(class(rez),"MatrixEQTL");
|
|
1837 status("");
|
|
1838 }
|
|
1839 # cat('s std ',snps.std$get(1),'\n');
|
|
1840 # cat('g std ',gene.std$get(1),'\n');
|
|
1841 ################################# Results collection ####################################
|
|
1842 return(rez);
|
|
1843 }
|
|
1844
|
|
1845 .histme = function(m, name1, name2, ...) {
|
|
1846 cnts = m$hist.counts;
|
|
1847 bins = m$hist.bins;
|
|
1848 ntst = m$ntests;
|
|
1849 centers = 0.5 * (bins[-1L] + bins[-length(bins)]);
|
|
1850 density = 0.5 / (bins[-1L] - centers) * cnts / ntst;
|
|
1851 ntext = paste("P-value distribution for ", name1, formatC(ntst, big.mark=",", format = "f", digits = 0), name2, " gene-SNP pairs ",sep="");
|
|
1852 r = structure(list(breaks = bins, counts = cnts, density = density,
|
|
1853 mids = centers, equidist = FALSE), class = "histogram");
|
|
1854 plot(r, main = ntext, ylab = "Density", xlab = "P-values", ...)
|
|
1855 abline( h = 1, col = "blue");
|
|
1856 return(invisible());
|
|
1857 }
|
|
1858
|
|
1859 .qqme = function(m, lcol, cex, pch, ...) {
|
|
1860 cnts = m$hist.counts;
|
|
1861 bins = m$hist.bins;
|
|
1862 ntst = m$ntests;
|
|
1863
|
|
1864 cusu = cumsum(cnts) / ntst;
|
|
1865 ypos = bins[-1][is.finite(cusu)];
|
|
1866 xpos = cusu[is.finite(cusu)];
|
|
1867 lines(-log10(xpos), -log10(ypos), col = lcol, ...);
|
|
1868 # lines(xpos, ypos, col = lcol, ...);
|
|
1869 if(length(m$eqtls$pvalue)==0)
|
|
1870 return();
|
|
1871 ypvs = -log10(m$eqtls$pvalue);
|
|
1872 xpvs = -log10(1:length(ypvs) / ntst);
|
|
1873 if(length(ypvs) > 1000) {
|
|
1874 # need to filter a bit, make the plotting faster
|
|
1875 levels = as.integer( xpvs/xpvs[1] * 1e3);
|
|
1876 keep = c(TRUE, diff(levels)!=0);
|
|
1877 levels = as.integer( ypvs/ypvs[1] * 1e3);
|
|
1878 keep = keep | c(TRUE, diff(levels)!=0);
|
|
1879 ypvs = ypvs[keep];
|
|
1880 xpvs = xpvs[keep];
|
|
1881 rm(keep)
|
|
1882 }
|
|
1883 points(xpvs, ypvs, col = lcol, pch = pch, cex = cex, ...);
|
|
1884 }
|
|
1885
|
|
1886 plot.MatrixEQTL = function(x, cex = 0.5, pch = 19, xlim = NULL, ylim = NULL, ...) {
|
|
1887 if( x$param$pvalue.hist == FALSE ) {
|
|
1888 warning("Cannot plot p-value distribution: the information was not recorded.\nUse pvalue.hist!=FALSE.");
|
|
1889 return(invisible());
|
|
1890 }
|
|
1891 if( x$param$pvalue.hist == "qqplot" ) {
|
|
1892 xmin = 1/max(x$cis$ntests, x$all$ntests);
|
|
1893 ymax = NULL;
|
|
1894 if(!is.null(ylim)) {
|
|
1895 ymax = ylim[2];
|
|
1896 } else {
|
|
1897 ymax = -log10(min(
|
|
1898 x$cis$eqtls$pvalue[1], x$cis$hist.bins[ c(FALSE,x$cis$hist.counts>0)][1],
|
|
1899 x$all$eqtls$pvalue[1], x$all$hist.bins[ c(FALSE,x$all$hist.counts>0)][1],
|
|
1900 x$trans$eqtls$pvalue[1], x$trans$hist.bins[c(FALSE,x$trans$hist.counts>0)][1],
|
|
1901 na.rm = TRUE))+0.1;
|
|
1902 }
|
|
1903 if(ymax == 0) {
|
|
1904 ymax = -log10(.Machine$double.xmin)
|
|
1905 }
|
|
1906 if(!is.null(ymax))
|
|
1907 ylim = c(0,ymax);
|
|
1908
|
|
1909 if(is.null(xlim))
|
|
1910 xlim = c(0, -log10(xmin/1.5));
|
|
1911
|
|
1912 plot(numeric(),numeric(), xlab = "-Log10(p-value), theoretical",
|
|
1913 ylab = "-Log10(p-value), observed",
|
|
1914 xlim = c(0, -log10(xmin/1.5)),
|
|
1915 ylim = ylim,
|
|
1916 xaxs="i", yaxs="i", ...);
|
|
1917 lines(c(0,1e3), c(0,1e3), col = "gray");
|
|
1918 if((x$param$pvOutputThreshold > 0) && (x$param$pvOutputThreshold.cis > 0)) {
|
|
1919 .qqme( x$cis, "red", cex, pch, ...);
|
|
1920 .qqme( x$trans, "blue", cex, pch, ...);
|
|
1921 title(paste("QQ-plot for",
|
|
1922 formatC(x$cis$ntests, big.mark=",", format = "f", digits = 0),
|
|
1923 "local and",
|
|
1924 formatC(x$trans$ntests, big.mark=",", format = "f", digits = 0),
|
|
1925 "distant gene-SNP p-values"));
|
|
1926 lset = c(1,2,4);
|
|
1927 } else
|
|
1928 if(x$param$pvOutputThreshold.cis > 0) {
|
|
1929 .qqme(x$cis, "red", cex, pch, ...);
|
|
1930 title(paste("QQ-plot for",
|
|
1931 formatC(x$cis$ntests, big.mark=",", format = "f", digits = 0),
|
|
1932 "local gene-SNP p-values"));
|
|
1933 lset = c(1,4);
|
|
1934 } else {
|
|
1935 .qqme(x$all, "blue", cex, pch, ...);
|
|
1936 title(paste("QQ-plot for all",
|
|
1937 formatC(x$all$ntests, big.mark=",", format = "f", digits = 0),
|
|
1938 "gene-SNP p-values"));
|
|
1939 lset = c(3,4);
|
|
1940 }
|
|
1941 legend("topleft",
|
|
1942 c("Local p-values","Distant p-values","All p-values","diagonal")[lset],
|
|
1943 col = c("red","blue","blue","gray")[lset],
|
|
1944 text.col = c("red","blue","blue","gray")[lset],
|
|
1945 pch = 20, lwd = 1, pt.cex = c(1,1,1,0)[lset])
|
|
1946 } else {
|
|
1947 if((x$param$pvOutputThreshold > 0) && (x$param$pvOutputThreshold.cis > 0)) {
|
|
1948 par(mfrow=c(2,1));
|
|
1949 .histme(x$cis, "", " local", ...);
|
|
1950 tran = list(hist.counts = x$all$hist.counts - x$cis$hist.counts,
|
|
1951 hist.bins = x$all$hist.bins,
|
|
1952 ntests = x$all$ntests - x$cis$ntests);
|
|
1953 .histme(x$trans,""," distant", ...);
|
|
1954 par(mfrow=c(1,1));
|
|
1955 } else
|
|
1956 if(x$param$pvOutputThreshold.cis > 0) {
|
|
1957 .histme(x$cis, "", " local", ...);
|
|
1958 } else {
|
|
1959 .histme(x$all, "all ", "" , ...);
|
|
1960 }
|
|
1961 }
|
|
1962 return(invisible());
|
|
1963 }
|
|
1964
|