Mercurial > repos > jasonxu > matrixeqtl

diff MatrixEQTL/R/Matrix_eQTL_engine.R @ 3:ae74f8fb3aef draft
Uploaded
author: jasonxu
date: Fri, 12 Mar 2021 08:20:57 +0000
parents: cd4c8e4a4b5b
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/R/Matrix_eQTL_engine.R	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,1964 @@
+# Matrix eQTL by Andrey A. Shabalin
+# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
+
+# http://cran.r-project.org/web/packages/policies.html
+
+library(methods)
+
+modelLINEAR = 117348L;
+modelANOVA  = 47074L;
+modelLINEAR_CROSS = 1113461L;
+
+.seq = function(a,b){if(a<=b){a:b}else{integer(0)}};
+
+SlicedData <- setRefClass( "SlicedData",
+	fields = list( 
+		dataEnv = "environment",
+		nSlices1 = "numeric",
+		rowNameSlices = "list",
+		columnNames = "character",
+		fileDelimiter = "character",
+		fileSkipColumns = "numeric",
+		fileSkipRows = "numeric",
+		fileSliceSize = "numeric",
+		fileOmitCharacters = "character"
+	),
+	methods = list(
+		initialize = function( mat = NULL ) {
+			dataEnv <<- new.env(hash = TRUE, size = 29L);
+			nSlices1 <<- 0L;
+			if(!is.null(mat)) {
+				CreateFromMatrix(mat);
+			}
+			fileSliceSize <<- 1000;
+			fileDelimiter <<- "\t";
+			fileSkipColumns <<- 1L;
+			fileSkipRows <<- 1L;
+			fileOmitCharacters <<- "NA"
+			return(invisible(.self));
+		},
+		CreateFromMatrix = function( mat ) {
+			stopifnot( class(mat) == "matrix" );
+			setSliceRaw( 1L ,mat );
+			rns = rownames( mat, do.NULL = FALSE);
+			#if( is.null(rns) ) {
+			#	rns = paste( "Row_",(1:nrow(mat)), sep="" );
+			#}
+			rowNameSlices <<- list(rns);
+			cns = colnames( mat, do.NULL = FALSE );
+			#if( is.null(cns) ){
+			#	cns = paste( "Col_",(1:ncol(mat)), sep="" );
+			#}
+			columnNames <<- cns;
+			return(invisible(.self));
+		},
+		getSlice = function(sl) {
+			value = get(paste(sl), dataEnv);
+			if( is.raw(value) ) {
+				storage.mode(value) = "double";
+				value[value == 255] = NA;
+			}
+			return( value  )	
+		},
+		getSliceRaw = function(sl) {
+			return( get(paste(sl), dataEnv) )	
+		},
+		setSliceRaw = function(sl, value) {
+			assign( paste(sl), value, dataEnv )
+			if( nSlices1 < sl ) {
+				nSlices1 <<- sl;
+			}
+		},
+		setSlice = function(sl, value) {
+			if( length(value) > 0 ) {
+				if( all(as.integer(value) == value, na.rm = TRUE) ) {
+					if( (min(value, na.rm = TRUE) >= 0 ) && 
+	                            (max(value, na.rm = TRUE) < 255) )
+					{
+						nv = value;
+						suppressWarnings({storage.mode(nv) = "raw"});
+						nv[ is.na(value)] = as.raw(255);
+						value = nv;
+					} else {
+						storage.mode(value) = "integer";
+					}
+				}
+			}
+			setSliceRaw(sl, value);
+		},	
+		nSlices = function() {
+			return( nSlices1 );
+		},
+		LoadFile = function(filename, skipRows = NULL, skipColumns = NULL, sliceSize = NULL, omitCharacters = NULL, delimiter = NULL, rowNamesColumn = 1) {
+			if( !is.null(skipRows) ) {
+				fileSkipRows <<- skipRows;
+			}
+			if( !is.null(skipColumns) ) {
+				fileSkipColumns <<- skipColumns;
+			}
+			if( !is.null(omitCharacters) ) {
+				fileOmitCharacters <<- omitCharacters;
+			}
+			if( !is.null(sliceSize) ) {
+				fileSliceSize <<- sliceSize;
+			}
+			if( !is.null(delimiter) ) {
+				fileDelimiter <<- delimiter;
+			}
+			stopifnot( (fileSkipColumns == 0) || (rowNamesColumn <= fileSkipColumns) )
+			stopifnot( (fileSkipColumns == 0) || (rowNamesColumn >= 1) )
+	
+			fid = file(description = filename, open = "rt", blocking = FALSE, raw = FALSE)
+			# clean object if file is open
+			Clear(); 
+			lines = readLines(con = fid, n = max(fileSkipRows,1L), ok = TRUE, warn = TRUE)
+			line1 = tail(lines,1);
+			splt = strsplit(line1, split = fileDelimiter, fixed = TRUE);
+			if( fileSkipRows > 0L ) {
+				columnNames <<- splt[[1]]; # [ -(1:fileSkipColumns) ];
+			} else {
+				seek(fid, 0)
+			}		
+			
+			rm( lines, line1, splt );
+			
+			rowNameSlices <<- vector("list", 15);
+	
+			curSliceId = 0L;
+			repeat
+			{
+				# preallocate names and data
+				if(length(rowNameSlices) < curSliceId) {
+					rowNameSlices[[2L*curSliceId]] <<- NULL;
+				}
+				curSliceId = curSliceId + 1L;
+				
+				# read sliceSize rows
+				rowtag = vector("character",fileSliceSize);
+				rowvals = vector("list",fileSliceSize);
+				for(i in 1:fileSliceSize) {
+					temp = "";
+					if( fileSkipColumns > 0L ) {
+						temp = scan(file = fid, what = character(), n = fileSkipColumns, quiet = TRUE,sep = fileDelimiter);
+					}
+					rowtag[i] = temp[rowNamesColumn];#paste(temp,collapse=" ");
+					rowvals[[i]] = scan(file = fid, what = double(), nlines = 1, quiet = TRUE, sep = fileDelimiter, na.strings = fileOmitCharacters);
+					if( length(rowvals[[i]]) == 0L ) {
+						if(i==1L) {
+							rowtag = matrix(0, 0, 0);
+							rowvals = character(0);
+						} else 	{
+							rowtag  = rowtag[  1:(i-1) ];
+							rowvals = rowvals[ 1:(i-1) ];
+						}
+						break;			
+					}
+				}
+				if( length(rowtag) == 0L ) {
+					curSliceId = curSliceId - 1L;
+					break;
+				}
+				rowNameSlices[[curSliceId]] <<- rowtag;
+				data = c(rowvals, recursive = TRUE);
+				dim(data) = c(length(rowvals[[1]]), length(rowvals));
+				data = t(data);
+				setSlice(curSliceId, data);
+				if( length(rowtag) < fileSliceSize ) {
+					break;
+				}
+				numtxt = formatC(curSliceId*fileSliceSize, big.mark=",", format = "f", digits = 0)
+				cat( "Rows read: ", numtxt, "\n");
+				flush.console()
+			}
+			close(fid)
+			if( fileSkipRows == 0 ) {
+				columnNames <<- paste("Col_", (1:nCols()), sep="");
+			} else {
+				columnNames <<- tail(columnNames, ncol(getSliceRaw(1)));
+			}
+			if( fileSkipColumns == 0 ) {
+				cnt = 0L;
+				for( sl in 1:nSlices() ) {
+					nr = length(getSliceRaw(sl));
+					rowNameSlices[[sl]] <<- paste("Row_",cnt + (1:nr),sep="");
+					cnt = cnt + nr;
+				}
+			}
+			rowNameSlices <<- rowNameSlices[1:curSliceId];
+			cat("Rows read: ", nRows(), " done.\n");
+			return(invisible(.self));
+		},
+		SaveFile = function(filename) {
+			if( nSlices() == 0 ) {
+				cat("No data to save");
+				return();
+			}
+			fid = file(filename,"wt");
+			for( sl in 1:nSlices() ) {
+				z = getSlice(sl);
+				rownames(z) = rowNameSlices[[sl]];
+				colnames(z) = columnNames;
+				write.table(z, file = fid, sep = "\t", 
+					col.names = (if(sl == 1){NA}else{FALSE}));
+			}
+			close(fid);
+		},
+		nRows = function() {
+			s = 0L;
+			for(sl in .seq(1,nSlices())) {
+				s = s + nrow(getSliceRaw(sl));
+			}
+			return( s )
+		},
+		nCols = function() {
+			if( nSlices() == 0L ) {
+				return(0L);
+			} else {
+				return( ncol(getSliceRaw(1L)) )
+			}
+		},
+		Clear = function() {
+			for( sl in .seq(1,nSlices()) ) {
+				rm(list = paste(sl), envir = dataEnv)
+			}
+			nSlices1 <<- 0L;
+			rowNameSlices <<- list();
+			columnNames <<- character();
+			return(invisible(.self));
+		},
+		IsCombined = function() {
+			return( nSlices() <= 1L );
+		},
+		GetAllRowNames = function() {
+			return( c(rowNameSlices, recursive=TRUE) );
+		},
+		SetNanRowMean = function() {
+			if( (nCols() == 0L) ) {
+				return(invisible(.self));
+			}
+			for( sl in .seq(1,nSlices()) ) {
+				slice = getSlice(sl);
+				if( any(is.na(slice)) ) {
+					rowmean = rowMeans(slice, na.rm = TRUE);
+					rowmean[is.na(rowmean)] = 0L;
+					for( j in which(!complete.cases(slice)) ) {
+						where1 = is.na(slice[j, ]);
+						slice[j, where1] = rowmean[j];
+					}
+					setSlice(sl, slice);
+				}
+			}
+			return(invisible(.self));
+		},
+		RowStandardizeCentered = function() {
+			for(sl in .seq(1,nSlices()) ) {
+				slice = getSlice(sl);
+				div = sqrt( rowSums(slice^2) );
+				div[ div == 0 ] = 1;
+				setSlice(sl, slice/div);
+			}
+			return(invisible(.self));
+		},
+		CombineInOneSlice = function() {
+			if( nSlices() <= 1L ) {
+				return(invisible(.self));			
+			}
+			nc = nCols();
+			nr = nRows();
+			datatypes = c("raw","integer","double");
+			datafuns = c(as.raw, as.integer, as.double);
+			datatype = character(nSlices());
+			for(sl in 1:nSlices()) {
+				datatype[sl] = typeof(getSliceRaw(sl));
+			}
+			mch = max(match(datatype,datatypes,nomatch = length(datatypes)));
+			datafun = datafuns[[mch]];
+			newData = matrix(datafun(0), nrow = nr, ncol = nc);
+			offset = 0;
+			for(sl in 1:nSlices()) {
+				if(mch==1) {
+					slice = getSliceRaw(sl);
+				} else {
+					slice = getSlice(sl);
+				}
+				newData[ offset + (1:nrow(slice)),] = datafun(slice);
+				setSlice(sl, numeric());
+				offset = offset + nrow(slice);
+			}
+			
+			nSlices1 <<- 1L;
+			setSliceRaw(1L, newData);
+			rm(newData);
+			
+			newrowNameSlices = GetAllRowNames();
+			rowNameSlices <<- list(newrowNameSlices)
+			return(invisible(.self));
+		},
+		ResliceCombined = function(sliceSize = -1) {
+			if( sliceSize > 0L ) {
+				fileSliceSize <<- sliceSize;
+			}
+			if( fileSliceSize <= 0 ) {
+				fileSliceSize <<- 1000;
+			}
+			if( IsCombined() ) {
+				nRows1 = nRows();
+				if(nRows1 == 0L) {
+					return(invisible(.self));
+				}
+				newNSlices = floor( (nRows1 + fileSliceSize - 1)/fileSliceSize );
+				oldData = getSliceRaw(1L);
+				#oldNames = rowNameSlices[[1]];
+				newNameslices = vector("list",newNSlices)
+				for( sl in 1:newNSlices ) {
+					range = (1+(sl-1)*fileSliceSize) : (min(nRows1,sl*fileSliceSize));
+					newpart = oldData[range, ,drop = FALSE];
+					if( is.raw(oldData) ) {
+						setSliceRaw( sl, newpart);
+					} else {
+						setSlice( sl, newpart);
+					}
+					newNameslices[[sl]] = rowNameSlices[[1]][range];
+				}
+				rowNameSlices <<- newNameslices ;
+			} else {
+				stop("Reslice of a sliced matrix is not supported yet. Use CombineInOneSlice first.");
+			}
+			return(invisible(.self));
+		},
+		Clone = function() {
+			clone = SlicedData$new();
+			for(sl in .seq(1,nSlices()) ) {
+				clone$setSliceRaw(sl,getSliceRaw(sl));
+			}
+			clone$rowNameSlices = rowNameSlices;
+			clone$columnNames = columnNames;
+			clone$fileDelimiter = fileDelimiter;
+			clone$fileSkipColumns = fileSkipColumns;
+			clone$fileSkipRows = fileSkipRows;
+			clone$fileSliceSize = fileSliceSize;
+			clone$fileOmitCharacters = fileOmitCharacters;
+			return( clone );		
+		},
+		RowMatrixMultiply = function(multiplier) {
+			for(sl in .seq(1,nSlices()) ) {
+				setSlice(sl, getSlice(sl) %*% multiplier);
+			}
+			return(invisible(.self));
+		},
+		ColumnSubsample = function(subset) {
+			for(sl in .seq(1,nSlices()) ) {
+				setSliceRaw(sl, getSliceRaw(sl)[ ,subset, drop = FALSE]);
+			}
+			columnNames <<- columnNames[subset];
+			return(invisible(.self));
+		},
+		RowReorderSimple = function(ordr) {
+			# had to use an inefficient and dirty method
+			# due to horrible memory management in R
+			if( (typeof(ordr) == "logical") && all(ordr) ) {
+				return(invisible(.self));
+			}
+			if( (length(ordr) == nRows()) && all(ordr == (1:length(ordr))) ) {
+				return(invisible(.self));
+			}
+			CombineInOneSlice();
+			gc();
+			setSliceRaw( 1L, getSliceRaw(1L)[ordr, ] );
+			rowNameSlices[[1]] <<- rowNameSlices[[1]][ordr];
+			gc();
+			ResliceCombined();
+			gc();
+			return(invisible(.self));
+		},
+		RowReorder = function(ordr) {
+			# transform logical into indices 
+			if( typeof(ordr) == "logical" ) {
+				if( length(ordr) == nRows() ) {
+					ordr = which(ordr);
+				} else {
+					stop("Parameter \"ordr\" has wrong length")
+				}
+			}
+			## first, check that anything has to be done at all
+			if( (length(ordr) == nRows()) && all(ordr == (1:length(ordr))) ) {
+				return(invisible(.self));
+			}
+			## check bounds
+			#if( (min(ordr) < 1) || (max(ordr) > nRows()) ) {
+			#	stop("Parameter \"ordr\" is out of bounds");
+			#}
+			## slice the data into individual rows
+			all_rows = vector("list", nSlices())
+			for( i in 1:nSlices() ) {
+				slice = getSliceRaw(i)
+				all_rows[[i]] = split(slice, 1:nrow(slice))
+				setSliceRaw(i,numeric())
+			}
+			gc();
+			all_rows = unlist(all_rows, recursive=FALSE, use.names = FALSE);
+			## Reorder the rows
+			all_rows = all_rows[ordr];
+			## get row names
+			all_names = GetAllRowNames();
+			## erase the set
+			rowNameSlices <<- list();
+			## sort names
+			all_names = all_names[ordr];
+			##
+			## Make slices back
+			nrows = length(all_rows);
+			nSlices1 <<- as.integer((nrows+fileSliceSize-1)/fileSliceSize);
+			##cat(nrows, " ", nSlices1);
+			rowNameSlices1 = vector("list", nSlices1);
+			for( i in 1:nSlices1 ) {
+				fr = 1 + fileSliceSize*(i-1);
+				to = min( fileSliceSize*i, nrows);
+	
+				subset = all_rows[fr:to];
+				types = unlist(lapply(subset,typeof));
+				israw = (types == "raw")
+				if(!all(israw == israw[1])) {
+					# some raw and some are not
+					subset = lapply(subset, function(x){if(is.raw(x)){x=as.integer(x);x[x==255] = NA;return(x)}else{return(x)}});
+				}
+				subset = unlist(subset);
+				dim(subset) = c( length(all_rows[[fr]]) , to - fr + 1)
+				#subset = matrix(subset, ncol = (to-fr+1));
+				if(is.raw(subset)) {
+					setSliceRaw(i, t(subset)); 
+				} else {
+					setSlice(i, t(subset)); 
+				}
+				rowNameSlices1[[i]] = all_names[fr:to];
+				all_rows[fr:to] = 0;
+				all_names[fr:to] = 0;
+			}
+			rowNameSlices <<- rowNameSlices1;
+			gc();
+			return(invisible(.self));
+		},
+		RowRemoveZeroEps = function(){
+			for(sl in .seq(1,nSlices()) ) {
+				slice = getSlice(sl);
+				amean = rowMeans(abs(slice));
+				remove = (amean < .Machine$double.eps*nCols());
+				if(any(remove)) {
+					rowNameSlices[[sl]] <<- rowNameSlices[[sl]][!remove];
+					setSlice(sl, slice[!remove, , drop = FALSE]);
+				}
+			}
+			return(invisible(.self));
+		},
+		FindRow = function(rowname) {
+			for(sl in .seq(1,nSlices()) ) {
+				mch = match(rowname,rowNameSlices[[sl]], nomatch = 0);
+				if( mch > 0 )
+				{
+					row = getSlice(sl)[mch[1], , drop=FALSE];
+					rownames(row) = rowname;
+					colnames(row) = columnNames;
+					return( list(slice = sl, item = mch, row = row) );
+				}
+			}
+			return( NULL );
+		},
+		show = function() {
+			cat("SlicedData object. For more information type: ?SlicedData\n");
+			cat("Number of columns:", nCols(), "\n");
+			cat("Number of rows:", nRows(), "\n");
+			cat("Data is stored in", nSlices(), "slices\n");
+			if(nCols()>0) {
+				z = getSlice(1L);
+				if(nrow(z)>0) {
+					z = z[1:min(nrow(z),10L), 1:min(ncol(z),10L), drop = FALSE];
+					rownames(z) = rowNameSlices[[1]][1:nrow(z)];
+					colnames(z) = columnNames[1:ncol(z)];
+					cat("Top left corner of the first slice (up to 10x10):\n");
+					methods:::show(z)
+				}
+			}		
+		}
+	))
+
+setGeneric("nrow")
+setMethod("nrow", "SlicedData",	function(x) {
+		return( x$nRows() );
+	})
+setGeneric("NROW")
+setMethod("NROW", "SlicedData",	function(x) {
+		return( x$nRows() );
+	})
+setGeneric("ncol")
+setMethod("ncol", "SlicedData",	function(x) {
+		return( x$nCols() );
+	})
+setGeneric("NCOL")
+setMethod("NCOL", "SlicedData",	function(x) {
+		return( x$nCols() );
+	})
+setGeneric("dim")
+setMethod("dim", "SlicedData",	function(x) {
+		return( c(x$nRows(),x$nCols()) );
+	})
+setGeneric("colnames")
+setMethod("colnames", "SlicedData",	function(x) {
+		return( x$columnNames );
+	})
+setGeneric("rownames")
+setMethod("rownames", "SlicedData",	function(x) {
+		return( x$GetAllRowNames() );
+	})
+setMethod("[[", "SlicedData",	function(x,i) {
+		return( x$getSlice(i) );
+	})
+setGeneric("length")
+setMethod("length", "SlicedData",	function(x) {
+		return( x$nSlices() );
+	})
+setMethod("[[<-", "SlicedData",	function(x,i,value) {
+		x$setSlice(i, value);
+		return(x);
+})
+summary.SlicedData = function(object, ...) {
+	z = c(nCols = object$nCols(), nRows = object$nRows(), nSlices = object$nSlices());
+	return(z);
+}
+
+##### setGeneric("summary") #####
+#setMethod("summary", "SlicedData",	function(object, ...) {
+#		z = c(nCols = object$nCols(), nRows = object$nRows(), nSlices = object$nSlices());
+#		return(z);
+#	})
+#setGeneric("show", standardGeneric("show"))
+# setMethod("show", "SlicedData",	function(object) {
+# 		cat("SlicedData object. For more information type: ?SlicedData\n");
+# 		cat("Number of columns:", object$nCols(), "\n");
+# 		cat("Number of rows:", object$nRows(), "\n");
+# 		cat("Data is stored in", object$nSlices(), "slices\n");
+# 		if(object$nSlices()>0) {
+# 			z = object$getSlice(1);
+# 			if(nrow(z)>0) {
+# 				z = z[1:min(nrow(z),10), 1:min(ncol(z),10), drop = FALSE];
+# 				rownames(z) = object$rowNameSlices[[1]][1:nrow(z)];
+# 				colnames(z) = object$columnNames[1:ncol(z)];
+# 				cat("Top left corner of the first slice (up to 10x10):\n");
+# 				show(z)
+# 			}
+# 		}		
+# 	})
+
+setGeneric("as.matrix")
+setMethod("as.matrix", "SlicedData", function(x) {
+		if(x$nSlices() == 0) {
+			return( matrix(0,0,0) );
+		}
+		if(x$nSlices() > 1) {
+			copy = x$Clone();
+			copy$CombineInOneSlice();
+		} else {
+			copy = x;
+		}
+		mat = copy$getSlice(1L);
+		rownames(mat) = rownames(copy);
+		colnames(mat) = colnames(copy);
+		return( mat );
+	})
+setGeneric("colnames<-")
+setMethod("colnames<-", "SlicedData", function(x,value) {
+		stopifnot( class(value) == "character" );
+		stopifnot( length(value) == x$nCols() );
+		x$columnNames = value;
+		return(x);
+	})
+setGeneric("rownames<-")
+setMethod("rownames<-", "SlicedData", function(x,value) {
+		stopifnot( class(value) == "character" );
+		stopifnot( length(value) == x$nRows() );
+		start = 1;
+		newNameSlices = vector("list", x$nSlices());
+		for( i in .seq(1,x$nSlices()) ) {
+			nr = nrow(x$getSliceRaw(i));
+			newNameSlices[[i]] = value[ start:(start+nr-1) ];
+			start = start + nr;
+		}
+		x$rowNameSlices = newNameSlices; 
+		return(x);
+	})
+setGeneric("rowSums")
+setMethod("rowSums", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
+		if(x$nSlices() == 0) {
+			return( numeric() );
+		}
+		stopifnot( dims == 1 );
+		thesum = vector("list", x$nSlices());
+		for( i in 1:x$nSlices() ) {
+			thesum[[i]] = rowSums(x$getSlice(i), na.rm)
+		}
+		return(unlist(thesum, recursive = FALSE, use.names = FALSE));
+	})
+setGeneric("rowMeans")
+setMethod("rowMeans", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
+		if(x$nSlices() == 0) {
+			return( numeric() );
+		}
+		stopifnot( dims == 1 );
+		thesum = vector("list", x$nSlices());
+		for( i in 1:x$nSlices() ) {
+			thesum[[i]] = rowMeans(x$getSlice(i), na.rm)
+		}
+		return(unlist(thesum, recursive = FALSE, use.names = FALSE));
+	})
+setGeneric("colSums")
+setMethod("colSums", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
+		if(x$nCols() == 0) {
+			return( numeric() );
+		}
+		stopifnot( dims == 1 );
+		thesum = 0;
+		for( i in .seq(1,x$nSlices()) ) {
+			thesum = thesum + colSums(x$getSlice(i), na.rm)
+		}
+		return(thesum);
+	})
+setGeneric("colMeans")
+setMethod("colMeans", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
+		if(x$nCols() == 0) {
+			return( numeric() );
+		}
+		stopifnot( dims == 1 );
+		thesum = 0;
+		thecounts = x$nRows();
+		for( i in .seq(1,x$nSlices()) ) {
+			slice = x$getSlice(i);
+			thesum = thesum + colSums(slice, na.rm)
+			if( na.rm ) {
+				thecounts = thecounts - colSums(is.na(slice))
+			}
+		}
+		return(thesum/thecounts);
+	})
+
+.listBuilder <- setRefClass(".listBuilder",
+	fields = list(
+		dataEnv = "environment",
+		n = "numeric"
+	),
+	methods = list(
+		initialize = function() {
+			dataEnv <<- new.env(hash = TRUE);
+			n <<- 0;
+# 			cumlength <<- 0;
+			return(.self);
+		},
+		add = function(x) {
+			if(length(x) > 0) {
+				n <<- n + 1;
+# 				cumlength <<- cumlength + length(x);
+				assign(paste(n), x, dataEnv );
+			}
+			return(.self);
+		},
+		set = function(i,x) {
+			if(length(x) > 0) {
+				if(i>n)
+					n <<- i;
+				assign(paste(i), x, dataEnv );
+			}
+			return(.self);
+		},
+		get = function(i) {
+			return(base::get(paste(i),dataEnv));
+		},
+		list = function() {
+			if(n==0)	return(list());
+			result = vector('list',n);
+			for( i in 1:n) {
+				result[[i]] = .self$get(i);
+			}
+			return(result);
+		},
+		unlist = function() {
+			return(base::unlist(.self$list(), recursive=FALSE, use.names = FALSE));
+		},
+		show = function() {
+			cat(".listBuilder object.\nIternal object in MatrixEQTL package.\n");
+			cat("Number of elements:", object$n, "\n");
+		}
+	))
+
+.histogrammer <- setRefClass(".histogrammer",
+	fields = list(
+		pvbins1 = "numeric",
+		statbins1 = "numeric",
+		hist.count = "numeric"
+	),
+	methods = list(
+		initialize = function (pvbins, statbins) {
+			if(length(pvbins)) {
+				ord = order(statbins);
+				pvbins1 <<- pvbins[ord];
+				statbins1 <<- statbins[ord];
+				hist.count <<- double(length(pvbins)-1);
+			}
+			return(.self);
+		},
+		update = function(stats.for.hist) {
+			h = hist(stats.for.hist, breaks = statbins1, include.lowest = TRUE, right = TRUE, plot = FALSE)$counts;
+			hist.count <<- hist.count + h;
+		},
+		getResults = function() {
+			if(!is.unsorted(pvbins1)) {
+				return(list(hist.bins =     pvbins1 , hist.counts =     hist.count ));
+			} else {
+				return(list(hist.bins = rev(pvbins1), hist.counts = rev(hist.count)));
+			}
+		}
+	))
+
+
+.minpvalue <- setRefClass(".minpvalue",
+	fields = list(
+		sdata = ".listBuilder",
+		gdata = ".listBuilder"
+	),
+	methods = list(
+		initialize = function(snps, gene) {
+			sdata <<- .listBuilder$new();
+			for( ss in 1:snps$nSlices() ) {
+				sdata$set( ss, double(nrow(snps$getSliceRaw(ss))));
+			}
+			gdata <<- .listBuilder$new();
+			for( gg in 1:gene$nSlices() ) {
+				gdata$set( gg, double(nrow(gene$getSliceRaw(gg))));
+			}
+			return(.self);
+		},
+		update = function(ss, gg, astat) {
+			gmax = gdata$get(gg)
+			z1 = max.col(astat,ties.method='first');
+			z11 = astat[1:nrow(astat) + nrow(astat) * (z1 - 1)];
+			gmax = pmax(gmax, z11);
+			gdata$set(gg, gmax);
+			
+			smax = sdata$get(ss)
+			z22 = apply(astat,2,max);
+			smax = pmax(smax, z22);
+			sdata$set(ss, smax);
+			return(.self);
+		},
+		updatecis = function(ss, gg, select.cis, astat) {
+			if(length(astat)>0)
+			{
+				byrows = aggregate(x=astat, by=list(row=select.cis[,1]), FUN=max);
+				bycols = aggregate(x=astat, by=list(col=select.cis[,2]), FUN=max);
+	
+				gmax = gdata$get(gg);
+				gmax[byrows$row] = pmax(gmax[byrows$row], byrows$x)
+				gdata$set(gg, gmax);
+				
+				smax = sdata$get(ss)
+				smax[bycols$col] = pmax(smax[bycols$col], bycols$x)
+				sdata$set(ss, smax);
+			}			
+			return(.self);
+		},
+		getResults = function(snps, gene, pvfun) {
+			min.pv.snps = pvfun(sdata$unlist());
+			names(min.pv.snps) = rownames(snps);
+			min.pv.gene = pvfun(gdata$unlist());
+			names(min.pv.gene) = rownames(gene);
+			return(list(min.pv.snps = min.pv.snps, min.pv.gene = min.pv.gene));
+		}
+	))
+
+.OutputSaver_FRD <- setRefClass(".OutputSaver_FRD",
+	fields = list(
+		sdata = ".listBuilder",
+		gdata = ".listBuilder",
+		cdata = ".listBuilder",
+		bdata = ".listBuilder",
+		fid = "list",
+		testfun1 = "list",
+		pvfun1 = "list"
+	),
+	methods = list(
+		initialize = function () {
+			sdata <<- .listBuilder$new();
+			gdata <<- .listBuilder$new();
+			cdata <<- .listBuilder$new();
+			bdata <<- .listBuilder$new();
+			fid <<- list(0);
+			testfun1 <<- list(0);
+			pvfun1 <<- list(0);
+			return(.self);
+		},
+		start = function(filename, statistic_name, unused1, unused2, testfun, pvfun) {
+			testfun1 <<- list(testfun);
+			pvfun1 <<- list(pvfun);
+			if(length(filename) > 0) {
+				if(class(filename) == "character") {
+					fid <<- list(file(description = filename, open = "wt", blocking = FALSE, raw = FALSE), TRUE);
+				} else {
+					fid <<- list(filename, FALSE)
+				}
+				writeLines( paste("SNP\tgene\t",statistic_name,"\tp-value\tFDR", sep = ""), fid[[1]]);
+			} else {
+				fid <<- list();
+			}
+		},
+		update = function(spos, gpos, sta, beta = NULL) {
+			if(length(sta)>0) {
+				sdata$add(spos);
+				gdata$add(gpos);
+				cdata$add(sta );
+				if(!is.null(beta ))
+					bdata$add(beta );
+			}
+			return(.self);
+		},
+		getResults = function( gene, snps, FDR_total_count) {
+			pvalues = NULL;
+ 			if(cdata$n > 0) {
+ 				tests = testfun1[[1]](cdata$unlist());
+ 				cdata <<- .listBuilder$new();
+ 				
+ 				pvalues = pvfun1[[1]](tests);
+ 				ord = order(pvalues);
+ 				
+ 				tests = tests[ord];
+ 				pvalues = pvalues[ord];
+ 				
+ 				FDR = pvalues * FDR_total_count / (1:length(pvalues));
+ 				FDR[length(FDR)] = min(FDR[length(FDR)], 1);
+ 				FDR = rev(cummin(rev(FDR)));
+ 				
+ 				snps_names  = snps$GetAllRowNames()[sdata$unlist()[ord]];
+ 				sdata <<- .listBuilder$new();
+				gene_names  = gene$GetAllRowNames()[gdata$unlist()[ord]];
+ 				gdata <<- .listBuilder$new();
+ 				
+ 				beta = NULL;
+ 				if(bdata$n > 0)
+ 					beta = bdata$unlist()[ord];
+				
+ 				if(length(fid)>0)	{	
+					step = 1000; ########### 100000
+					for( part in 1:ceiling(length(FDR)/step) ) {
+	 					fr = (part-1)*step + 1;
+	 					to = min(part*step, length(FDR));
+						dump = data.frame(snps_names[fr:to],
+															gene_names[fr:to],
+															if(is.null(beta)) tests[fr:to] else list(beta[fr:to],tests[fr:to]),
+															pvalues[fr:to],
+															FDR[fr:to], 
+															row.names = NULL, 
+															check.rows = FALSE, 
+															check.names = FALSE, 
+															stringsAsFactors = FALSE);
+						write.table(dump, file = fid[[1]], quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE);
+					}
+ 				}
+			} else {
+				cat("No significant associations were found.\n", file = if(length(fid)>0){fid[[1]]}else{""});
+			}
+			if(length(fid)>0)	{	
+				if(fid[[2]]) {
+					close(fid[[1]]);
+				}
+	 			fid <<- list();
+			}
+ 			
+ 			if(!is.null(pvalues)) {
+ 				eqtls = list( snps = snps_names,
+							 				gene = gene_names,
+											statistic = tests,
+											pvalue = pvalues,
+											FDR = FDR);
+ 				if(!is.null(beta))
+ 					eqtls$beta = beta;
+ 			} else {
+ 				eqtls = list( snps = character(),
+							 				gene = character(),
+				 							beta = numeric(),
+											statistic = numeric(),
+											pvalue = numeric(),
+											FDR = numeric());
+ 			}
+			return(list(eqtls = data.frame(eqtls)));
+		}
+	)
+)
+
+
+.OutputSaver_direct <- setRefClass(".OutputSaver_direct",
+	fields = list(
+		gene_names = "character",
+		snps_names = "character",
+		fid = "list",
+		testfun1 = "list",
+		pvfun1 = "list"
+	),
+	methods = list(
+		initialize = function() {
+			gene_names <<- character(0);
+			snps_names <<- character(0);
+			fid <<- list(0);
+			testfun1 <<- list(0);
+			pvfun1 <<- list(0);
+			return(.self);
+		},
+		start = function(filename, statistic_name, snps, gene, testfun, pvfun) {
+			# I hope the program stops if it fails to open the file
+			if(class(filename) == "character") {
+				fid <<- list(file(description = filename, open = "wt", blocking = FALSE, raw = FALSE), TRUE);
+			} else {
+				fid <<- list(filename, FALSE)
+			}
+			writeLines(paste("SNP\tgene\t", statistic_name, "\tp-value", sep = ""), fid[[1]]);
+			gene_names <<- gene$GetAllRowNames();
+			snps_names <<- snps$GetAllRowNames();
+			testfun1 <<- list(testfun);
+			pvfun1 <<- list(pvfun);
+		},
+		update = function(spos, gpos, sta, beta = NULL) {
+			if( length(sta) == 0 )
+				return();
+			sta = testfun1[[1]](sta);
+			lst = list(snps = snps_names[spos], gene = gene_names[gpos], beta = beta, statistic = sta, pvalue = pvfun1[[1]](sta));
+			lst$beta = lst$beta;
+			
+			dump2 = data.frame(lst, row.names = NULL, check.rows = FALSE, check.names = FALSE, stringsAsFactors = FALSE);
+			write.table(dump2, file = fid[[1]], quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE);
+		},
+		getResults = function(...) {
+			if(length(fid)>0)	{	
+				if(fid[[2]]) {
+					close(fid[[1]]);
+				}
+				fid <<- list();
+			}
+			gene_names <<- character(0);
+			snps_names <<- character(0);
+ 			return(list());
+		}
+	)
+)
+
+.my.pmin = function(x, val) {
+	# minimum "pmin" function that can handle empty array
+	if(length(x) == 0) {
+		return(x)
+	} else {
+		return(pmin.int(x,val));
+	}	
+}
+
+.my.pmax = function(x, val) {
+	# minimum "pmin" function that can handle empty array
+	if(length(x) == 0) {
+		return(x)
+	} else {
+		return(pmax.int(x,val));
+	}	
+}
+
+.pv.nz = function(x){return( .my.pmax(x,.Machine$double.xmin) )}
+ 
+.SetNanRowMean = function(x) {
+	if( any(is.na(x)) ) {
+		rowmean = rowMeans(x, na.rm = TRUE);
+		rowmean[ is.na(rowmean) ] = 0;
+		for( j in which(!complete.cases(x)) ) {
+			where1 = is.na( x[j, ] );
+			x[j,where1] = rowmean[j];
+		}
+	}
+	return(x);
+}
+
+# .SNP_process_split_for_ANOVA = function(x) {
+# 		# split into 2 dummy variables
+# 
+# 		uniq = unique(c(x));
+# 		uniq = uniq[!is.na(uniq)];
+# 		
+# 		if( length(uniq) > 3 ) {
+# 			stop("More than three genotype categories is not handled by ANOVA");
+# 		} else if ( length(uniq) < 3 ) {
+# 			uniq = c(uniq, min(uniq)-(1:(3-length(uniq))));
+# 		}
+# 		
+# 		freq = matrix(0, nrow(x), length(uniq));
+# 		for(i in 1:length(uniq)) {
+# 			freq[ ,i] = rowSums(x==uniq[i], na.rm = TRUE);
+# 		}
+# 		
+# 		md = apply(freq, 1, which.max);
+# 		freq[ cbind(1:nrow(x),md) ] = -1;
+# 		
+# 		md = apply(freq, 1, which.max); # min(freq[cbind(1:nrow(slice),md)] - rowSums(select,na.rm = TRUE ))
+# 		new_slice1 = (x == uniq[md]);
+# 		new_slice1[is.na(new_slice1)] = 0;
+# 		freq[ cbind(1:nrow(x),md) ] = -1;
+# 				
+# 		md = apply(freq,1,which.max);
+# 		new_slice2 = (x == uniq[md]);
+# 		new_slice2[ is.na(new_slice2) ] = 0;
+# 		rez = vector("list", 2);
+# 		rez[[1]] = new_slice1;
+# 		rez[[2]] = new_slice2;
+# 		return( rez );
+# }
+
+.SNP_process_split_for_ANOVA = function(x,n.groups) {
+	# split into 2 dummy variables (or more)
+	
+# 	# Get the number of ANOVA groups
+# 	n.groups = options('MatrixEQTL.ANOVA.categories')[[1]];
+# 	if( is.null(n.groups))
+# 		n.groups = 3;
+	
+	# Unique values in x (make sure it has length of n.groups);
+	uniq = unique(as.vector(x));
+	uniq = uniq[!is.na(uniq)];
+	if( length(uniq) > n.groups ) {
+		stop("More than declared number of genotype categories is detected by ANOVA");
+	} else if ( length(uniq) < n.groups ) {
+		uniq = c(uniq, rep(min(uniq)-1, n.groups-length(uniq)));
+	}
+	
+	# Table of frequencies for each variable (row)
+	freq = matrix(0, nrow(x), n.groups);
+	for(i in 1:n.groups) {
+		freq[ ,i] = rowSums(x==uniq[i], na.rm = TRUE);
+	}
+	# remove NA's from x for convenience
+	x[is.na(x)] = min(uniq)-2;
+	
+	# Output list of matrices
+	rez = vector('list',n.groups-1);
+
+	# Skip the most frequent value
+	md = apply(freq, 1, which.max); # most frequent value for each variable
+	freq[ cbind(1:nrow(x),md) ] = -1;
+	
+	# The rest form dumm
+	for(j in 1:(n.groups-1)){
+		md = apply(freq, 1, which.max); 
+		freq[ cbind(1:nrow(x),md) ] = -1;
+		rez[[j]] = (x == uniq[md]);
+	}			
+	return( rez );
+}
+
+Matrix_eQTL_engine = function(
+						snps, 
+						gene, 
+						cvrt = SlicedData$new(), 
+						output_file_name, 
+						pvOutputThreshold = 1e-5, 
+						useModel = modelLINEAR, 
+						errorCovariance = numeric(), 
+						verbose = TRUE,
+ 						pvalue.hist = FALSE,
+						min.pv.by.genesnp = FALSE,
+						noFDRsaveMemory = FALSE) {
+	rez = Matrix_eQTL_main(
+				snps = snps, 
+				gene = gene, 
+				cvrt = cvrt, 
+				output_file_name = output_file_name, 
+				pvOutputThreshold = pvOutputThreshold,
+				useModel = useModel, 
+				errorCovariance = errorCovariance, 
+				verbose = verbose,
+ 				pvalue.hist = pvalue.hist,
+				min.pv.by.genesnp = min.pv.by.genesnp,
+				noFDRsaveMemory = noFDRsaveMemory);
+	return( rez );
+}
+
+Matrix_eQTL_main = function(	
+						snps, 
+						gene, 
+						cvrt = SlicedData$new(), 
+						output_file_name = "", 
+						pvOutputThreshold = 1e-5,
+						useModel = modelLINEAR, 
+						errorCovariance = numeric(), 
+						verbose = TRUE, 
+						output_file_name.cis = "", 
+						pvOutputThreshold.cis = 0,
+						snpspos = NULL, 
+						genepos = NULL,
+						cisDist = 1e6,
+ 						pvalue.hist = FALSE,
+						min.pv.by.genesnp = FALSE,
+						noFDRsaveMemory = FALSE) {
+	################################# Basic variable checks #################################
+ 	{				
+		# status("Performing basic checks of the input variables");
+		stopifnot( "SlicedData" %in% class(gene) );
+		stopifnot( "SlicedData" %in% class(snps) );
+		stopifnot( "SlicedData" %in% class(cvrt) );
+		
+		# Check dimensions
+		if( min(snps$nRows(),snps$nCols()) == 0 )
+			stop("Empty genotype dataset");
+		if( min(gene$nRows(),gene$nCols()) == 0 )
+			stop("Empty expression dataset");
+		if( snps$nCols() != gene$nCols() )
+			stop("Different number of samples in the genotype and gene expression files");
+		if( cvrt$nRows()>0 ) {
+			if( snps$nCols() != cvrt$nCols() )
+				stop("Wrong number of samples in the matrix of covariates");
+		}
+
+		stopifnot( class(pvOutputThreshold) == "numeric" );
+		stopifnot( length(pvOutputThreshold) == 1 );
+		stopifnot( pvOutputThreshold >= 0 );
+		stopifnot( pvOutputThreshold <= 1 );
+
+		stopifnot(  class(noFDRsaveMemory) == "logical" );
+		stopifnot( length(noFDRsaveMemory) == 1 );
+
+		if( pvOutputThreshold > 0 ) {
+			stopifnot( !((length(output_file_name) == 0) && noFDRsaveMemory) )
+			stopifnot( length(output_file_name) <= 1 );
+			if( length(output_file_name) == 1 ) {
+				stopifnot( class(output_file_name) %in% c("character","connection") );
+			}
+		}
+		
+		stopifnot( class(pvOutputThreshold.cis) == "numeric" );
+		stopifnot( length(pvOutputThreshold.cis) == 1 );
+		stopifnot( pvOutputThreshold.cis >= 0 );
+		stopifnot( pvOutputThreshold.cis <= 1 );
+		stopifnot( !((pvOutputThreshold > 0) & (pvOutputThreshold.cis > 0) & (pvOutputThreshold > pvOutputThreshold.cis)) );
+		stopifnot( (pvOutputThreshold > 0) | (pvOutputThreshold.cis > 0) );
+
+		stopifnot( class(useModel) == class(modelLINEAR) );
+		stopifnot( length(useModel) == 1 );
+		stopifnot( useModel %in% c(modelLINEAR, modelANOVA, modelLINEAR_CROSS) );
+		if( useModel %in%  c(modelLINEAR, modelLINEAR_CROSS) ) {
+			if( snps$nCols() <= cvrt$nRows() + 1 + 1) {
+				stop('The number of covariates exceeds the number of samples.\nLinear regression can not be fit.')
+			}
+		}
+		if( useModel == modelLINEAR_CROSS ) {
+			if( cvrt$nRows() == 0 ) {
+				stop( "Model \"modelLINEAR_CROSS\" requires at least one covariate" );
+			}
+		}
+		if( useModel == modelANOVA ) {
+			n.anova.groups = getOption('MatrixEQTL.ANOVA.categories', 3);
+			stopifnot( n.anova.groups == floor(n.anova.groups) );
+			stopifnot( n.anova.groups >= 3 );
+# 			stopifnot( n.anova.groups < snps$nCols() - cvrt$nRows() - 2 );
+			if( snps$nCols() <= cvrt$nRows() + n.anova.groups) {
+				stop('The number of covariates exceeds the number of samples.\nLinear regression (ANOVA) can not be fit.')
+			}
+		}
+		
+		stopifnot(  class(verbose) == "logical" );
+		stopifnot( length(verbose) == 1 );
+
+		stopifnot(  class(min.pv.by.genesnp) == "logical" );
+		stopifnot( length(min.pv.by.genesnp) == 1 );
+	
+		if( pvOutputThreshold.cis > 0 ) {
+			stopifnot( !((length(output_file_name.cis) == 0) && noFDRsaveMemory) )
+			stopifnot( length(output_file_name.cis) <= 1 );
+			if( length(output_file_name.cis) == 1 ) {
+				stopifnot( class(output_file_name.cis) %in% c("character","connection") );
+			}
+
+# 			stopifnot( class(output_file_name.cis) == "character" );
+# 			stopifnot( length(output_file_name.cis) == 1 );
+			stopifnot( class(snpspos) == "data.frame" );
+			stopifnot( ncol(snpspos) == 3 );
+			stopifnot( nrow(snpspos) > 0 );
+			stopifnot( class(snpspos[1,3]) %in% c("integer", "numeric") )
+			stopifnot( !any(is.na(snpspos[,3])) )
+			stopifnot( class(genepos) == "data.frame" );
+			stopifnot( ncol(genepos) == 4 );
+			stopifnot( nrow(genepos) > 0 );
+			stopifnot( class(genepos[1,3]) %in% c("integer", "numeric") )
+			stopifnot( class(genepos[1,4]) %in% c("integer", "numeric") )
+			stopifnot( !any(is.na(genepos[,3])) )
+			stopifnot( !any(is.na(genepos[,4])) )
+			stopifnot( nzchar(output_file_name.cis) )
+		}
+		
+		if( pvOutputThreshold > 0 ) {
+			stopifnot( nzchar(output_file_name) )
+		}
+		
+		stopifnot( class(errorCovariance) %in% c("numeric", "matrix") );
+		errorCovariance = as.matrix(errorCovariance);
+		if(length(errorCovariance)>0) {
+			if( nrow(errorCovariance) != ncol(errorCovariance) ) {
+				stop("The covariance matrix is not square");
+			}	
+			if( nrow(errorCovariance) != snps$nCols() ) {
+				stop("The covariance matrix size does not match the number of samples");
+			}
+			if( !all(errorCovariance == t(errorCovariance)) ) {
+				stop("The covariance matrix is not symmetric");
+			}
+		}
+	}
+	################################# Initial setup #########################################
+	{
+		gene.std = .listBuilder$new();
+		snps.std = .listBuilder$new();
+		
+		dont.clone.gene = getOption('MatrixEQTL.dont.preserve.gene.object', FALSE)
+		if(is.null(dont.clone.gene))
+			dont.clone.gene = FALSE;
+		
+		if( !dont.clone.gene )
+			gene = gene$Clone();
+		# snps = snps$Clone(); # snps is read only
+		cvrt = cvrt$Clone();
+
+		params = list(
+			output_file_name = output_file_name, 
+			pvOutputThreshold = pvOutputThreshold,
+			useModel = useModel, 
+			errorCovariance = errorCovariance , 
+			verbose = verbose, 
+			output_file_name.cis = output_file_name.cis, 
+			pvOutputThreshold.cis = pvOutputThreshold.cis,
+			cisDist = cisDist ,
+	 		pvalue.hist = pvalue.hist,
+			min.pv.by.genesnp = min.pv.by.genesnp);
+
+		if( verbose ) {
+			lastTime = 0;
+			status <- function(text) {
+				# gc();
+				newTime = proc.time()[3];
+				if(lastTime != 0) {
+					cat("Task finished in ", newTime-lastTime, " seconds\n");
+				}
+				cat(text,"\n");
+				lastTime <<- newTime;
+				unused = flush.console();
+			}
+		} else {
+			status = function(text){}
+		}
+		start.time = proc.time()[3];
+	}
+	################################# Error covariance matrix processing ####################
+	{
+		if( length(errorCovariance) > 0 ) {
+			status("Processing the error covariance matrix");
+			eig = eigen(errorCovariance, symmetric = TRUE)
+			d = eig$values;
+			v = eig$vectors;
+			#  errorCovariance == v %*% diag(d) %*% t(v)
+			#  errorCovariance^0.5 == v*sqrt(d)*v" (no single quotes anymore)
+			#  errorCovariance^(-0.5) == v*diag(1./sqrt(diag(d)))*v"
+			if( any(d<=0) ) {
+				stop("The covariance matrix is not positive definite");
+			}
+			correctionMatrix = v %*% diag(1./sqrt(d)) %*% t(v);
+			rm( eig, v, d, errorCovariance )
+		} else {
+			rm( errorCovariance );
+			correctionMatrix = numeric();
+		}
+	}
+	################################# Matching gene and SNPs locations ######################
+	if( pvOutputThreshold.cis > 0 ) {
+		status("Matching data files and location files")
+		
+		# names in the input data	
+		gene_names = gene$GetAllRowNames();
+		snps_names = snps$GetAllRowNames();
+		
+		# gene range, set: left<right
+		if(any(genepos[,3] > genepos[,4])) {
+			temp3 = genepos[,3];
+			temp4 = genepos[,4];
+			genepos[,3] = pmin(temp3,temp4);
+			genepos[,4] = pmax(temp3,temp4);
+			rm(temp3, temp4);
+		}
+		
+		# match with the location data
+		genematch = match( gene_names, genepos[ ,1],  nomatch = 0L);
+		usedgene = matrix(FALSE, nrow(genepos), 1); # genes in "genepos" that are matching  "gene_names"
+		usedgene[ genematch ] = TRUE;
+		if( !any(genematch) ) {
+			stop("Gene names do not match those in the gene location file.");
+		}
+		cat( sum(genematch>0), "of", length(gene_names), " genes matched\n");
+		
+		
+		snpsmatch = match( snps_names, snpspos[ ,1],  nomatch = 0L);
+		usedsnps = matrix(FALSE, nrow(snpspos),1);
+		usedsnps[ snpsmatch ] = TRUE;
+		if( !any(snpsmatch) ) {
+			stop("SNP names do not match those in the SNP location file.");
+		}
+		cat( sum(snpsmatch>0), "of", length(snps_names), " SNPs matched\n");
+		
+		# list used chr names
+		chrNames = unique(c( as.character(unique(snpspos[usedsnps,2])), 
+												 as.character(unique(genepos[usedgene,2])) ))
+		chrNames = chrNames[ sort.list( suppressWarnings(as.integer(chrNames)), 
+																		method = "radix", na.last = TRUE ) ];
+		# match chr names
+		genechr = match(genepos[,2],chrNames);
+		snpschr = match(snpspos[,2],chrNames);
+		
+		# max length of a chromosome
+		chrMax = max( snpspos[usedsnps, 3], genepos[usedgene, 4], na.rm = TRUE) + cisDist;
+		
+		# Single number location for all rows in "genepos" and "snpspos"
+ 		genepos2 = as.matrix(genepos[ ,3:4, drop = FALSE] + (genechr-1)*chrMax);
+ 		snpspos2 = as.matrix(snpspos[ ,3  , drop = FALSE] + (snpschr-1)*chrMax);
+		
+		# the final location arrays;
+		snps_pos = matrix(0,length(snps_names),1);
+		snps_pos[snpsmatch>0, ] = snpspos2[snpsmatch, , drop = FALSE];
+		snps_pos[rowSums(is.na(snps_pos))>0, ] = 0;
+		snps_pos[snps_pos==0] = (length(chrNames)+1) * (chrMax+cisDist);
+		rm(snps_names, snpsmatch, usedsnps, snpschr, snpspos2)
+		
+		gene_pos = matrix(0,length(gene_names),2);
+		gene_pos[genematch>0, ] = genepos2[genematch, , drop = FALSE];
+		gene_pos[rowSums(is.na(gene_pos))>0, ] = 0;
+		gene_pos[gene_pos==0] = (length(chrNames)+2) * (chrMax+cisDist);
+		rm(gene_names, genematch, usedgene, genechr, genepos2)
+		rm(chrNames, chrMax);
+
+		if( is.unsorted(snps_pos) ) {
+			status("Reordering SNPs\n");
+			ordr = sort.list(snps_pos);
+			snps$RowReorder(ordr);
+			snps_pos = snps_pos[ordr, , drop = FALSE];
+			rm(ordr);
+		}
+		if( is.unsorted(rowSums(gene_pos)) ) {
+			status("Reordering genes\n");
+			ordr = sort.list(rowSums(gene_pos));
+			gene$RowReorder(ordr);
+			gene_pos = gene_pos[ordr, , drop = FALSE];
+			rm(ordr);
+		}
+		
+		# Slice it back.
+		geneloc = vector("list", gene$nSlices())
+		gene_offset = 0;
+		for(gc in 1:gene$nSlices()) {
+			nr = length(gene$rowNameSlices[[gc]]);
+			geneloc[[gc]] = gene_pos[gene_offset + (1:nr), , drop = FALSE];
+			gene_offset = gene_offset + nr;	
+		}
+		rm(gc, gene_offset, gene_pos);
+		
+		snpsloc = vector("list", snps$nSlices())
+		snps_offset = 0;
+		for(sc in 1:snps$nSlices()) {
+			nr = length(snps$rowNameSlices[[sc]]);
+			snpsloc[[sc]] = snps_pos[snps_offset + (1:nr), , drop = FALSE];
+			snps_offset = snps_offset + nr;	
+		}
+		rm(nr, sc, snps_offset, snps_pos);
+	}
+	################################# Covariates processing #################################
+	{	
+		status("Processing covariates");
+		if( useModel == modelLINEAR_CROSS ) {
+			last.covariate = as.vector(tail( cvrt$getSlice(cvrt$nSlices()), n = 1));
+		}		
+		if( cvrt$nRows()>0 ) {
+			cvrt$SetNanRowMean();
+			cvrt$CombineInOneSlice();
+			cvrt = rbind(matrix(1,1,snps$nCols()),cvrt$getSlice(1));
+		} else {
+			cvrt = matrix(1,1,snps$nCols());
+		}
+		# Correct for the error covariance structure
+		if( length(correctionMatrix)>0 ) {
+			cvrt = cvrt %*% correctionMatrix;
+		}
+		# Orthonormalize covariates
+		# status("Orthonormalizing covariates");
+		q = qr(t(cvrt));
+		if( min(abs(diag(qr.R(q)))) < .Machine$double.eps * snps$nCols() ) {
+			stop("Colinear or zero covariates detected");
+		}
+		cvrt = t( qr.Q(q) );
+		rm( q );
+	}
+	################################# Gene expression processing ############################
+	{
+		status("Processing gene expression data (imputation, residualization, etc.)");
+		# Impute gene expression
+		gene$SetNanRowMean();
+		# Correct for the error covariance structure
+		if( length(correctionMatrix)>0 ) {
+			gene$RowMatrixMultiply(correctionMatrix);
+		}
+		# Orthogonolize expression w.r.t. covariates
+		# status("Orthogonolizing expression w.r.t. covariates");
+		gene_offsets = double(gene$nSlices()+1);
+		for( sl in 1:gene$nSlices() ) {
+			slice = gene$getSlice(sl);
+			gene_offsets[sl+1] = gene_offsets[sl] + nrow(slice);
+			rowsq1 = rowSums(slice^2);
+			slice = slice - tcrossprod(slice,cvrt) %*% cvrt;
+			rowsq2 = rowSums(slice^2);
+			# kill rows colinear with the covariates
+			delete.rows = (rowsq2 <= rowsq1 * .Machine$double.eps );
+			slice[delete.rows] = 0;
+			div = sqrt( rowSums(slice^2) );
+			div[ div == 0 ] = 1;
+			gene.std$set(sl, div);
+			gene$setSlice(sl, slice / div);
+		}
+		rm(rowsq1, rowsq2, delete.rows, div);
+		rm( sl, slice );
+		#gene$RowRemoveZeroEps();
+	}
+	################################# snps_process, testfun, pvfun, threshfun, afun  ########
+	{
+		# snps_process - preprocess SNPs slice
+		#
+		# afun --- abs for signed stats, identity for non-negative
+		# threshfun --- internal stat threshold for given p-value
+		# testfun --- t or F statistic from the internal one
+		# pvfun --- p-value from the t or F statistic
+		
+		nSamples = snps$nCols();
+		nGenes = gene$nRows();
+		nSnps  = snps$nRows();
+		nCov = nrow(cvrt);
+		# nVarTested = length(snps_list); # set in case(useModel)
+		# dfNull = nSamples - nCov;
+		# d.f. of the full model
+		betafun = NULL;
+		
+		if( useModel == modelLINEAR ) {
+			snps_process = function(x) {
+				return( list(.SetNanRowMean(x)) );
+			};
+			nVarTested = 1;
+			dfFull = nSamples - nCov - nVarTested;
+			statistic.fun = function(mat_list) {
+				return( mat_list[[1]] );
+			}
+			afun = function(x) {return(abs(x))};
+			threshfun = function(pv) {
+				thr = qt(pv/2, dfFull, lower.tail = FALSE);
+				thr = thr^2;
+				thr = sqrt(  thr / (dfFull + thr) );
+				thr[pv >= 1] = 0;
+				thr[pv <= 0] = 1;
+				return( thr );
+			}
+			testfun = function(x) { return( x * sqrt( dfFull / (1 - .my.pmin(x^2,1))));	}
+			pvfun = function(x) { return( .pv.nz(pt(-abs(x),dfFull)*2)); }
+			thresh.cis = threshfun(pvOutputThreshold.cis);
+			thresh = threshfun(pvOutputThreshold);
+			betafun = function(stat, ss, gg, select) {
+				return(stat * gene.std$get(gg)[select[,1]] / snps.std$get(ss)[select[,2]]);
+			}
+		} else 
+		if( useModel == modelANOVA ) {
+			snps_process = function(x).SNP_process_split_for_ANOVA(x,n.anova.groups);
+			nVarTested = n.anova.groups - 1;
+			dfFull = nSamples - nCov - nVarTested;
+# 			statistic.fun = function(mat_list) {
+# 				return( mat_list[[1]]^2 + mat_list[[2]]^2 );
+# 			}
+			statistic.fun = function(mat_list) {
+				x = mat_list[[1]]^2;
+				for( j in 2:length(mat_list) )
+					x = x + mat_list[[j]]^2;
+				return( x );
+			}
+			afun = identity;
+			threshfun = function(pv) {
+				thr = qf(pv, nVarTested, dfFull, lower.tail = FALSE);
+				thr = thr / (dfFull/nVarTested + thr);
+				thr[pv >= 1] = 0;
+				thr[pv <= 0] = 1;
+				return( thr );
+			}
+			testfun = function(x) { return( x / (1 - .my.pmin(x,1)) * (dfFull/nVarTested) ); }
+			pvfun = function(x) { return( .pv.nz(pf(x, nVarTested, dfFull, lower.tail = FALSE)) ); }
+			thresh.cis = threshfun(pvOutputThreshold.cis);
+			thresh = threshfun(pvOutputThreshold);
+		} else 
+		if( useModel == modelLINEAR_CROSS ) {
+			last.covariate = as.vector( last.covariate );
+			snps_process = .SNP_process_split_for_LINEAR_CROSS = function(x) {
+				out = vector("list", 2);
+				out[[1]] = .SetNanRowMean(x);
+				out[[2]] = t( t(out[[1]]) * last.covariate );
+				return( out );
+			};
+			nVarTested = 1;
+			dfFull = nSamples - nCov - nVarTested - 1;
+			statistic.fun = function(mat_list) {
+				return( mat_list[[2]] / sqrt(1 - mat_list[[1]]^2) );
+			}
+			afun = function(x) {return(abs(x))};
+			threshfun = function(pv) {
+				thr = qt(pv/2, dfFull, lower.tail = FALSE);
+				thr = thr^2;
+				thr = sqrt(  thr / (dfFull + thr) );
+				thr[pv >= 1] = 0;
+				thr[pv <= 0] = 1;
+				return( thr );
+			}
+			testfun = function(x) { return( x * sqrt( dfFull / (1 - .my.pmin(x^2,1))));	}
+			pvfun = function(x) { return( .pv.nz(pt(-abs(x),dfFull)*2 )); }		
+			thresh.cis = threshfun(pvOutputThreshold.cis);
+			thresh = threshfun(pvOutputThreshold);				
+			betafun = function(stat, ss, gg, select) {
+				return(stat * gene.std$get(gg)[select[,1]] / snps.std$get(ss)[select[,2]]);
+			}
+		}
+		params$dfFull = dfFull;
+	}
+	################################# Saver class(es) creation ##############################
+	{
+		status("Creating output file(s)");
+		if(noFDRsaveMemory) {
+			if( pvOutputThreshold > 0 ) {
+				saver.tra = .OutputSaver_direct$new();
+			}
+			if( pvOutputThreshold.cis > 0 ) {
+				saver.cis = .OutputSaver_direct$new();
+			}
+		} else {
+			if( pvOutputThreshold > 0 ) {
+				saver.tra = .OutputSaver_FRD$new();
+			}
+			if( pvOutputThreshold.cis > 0 ) {
+				saver.cis = .OutputSaver_FRD$new();
+			}
+		}
+		if( pvOutputThreshold > 0 )
+			if( pvOutputThreshold * gene$nRows() * snps$nRows() > 1000000 )
+				if(!noFDRsaveMemory)
+					cat('Warning: pvOutputThreshold may be too large.\nExpected number of findings > ', 
+							pvOutputThreshold * gene$nRows() * snps$nRows(),'\n');
+		if( (useModel == modelLINEAR) || (useModel == modelLINEAR_CROSS) ) {
+			statistic_name = "t-stat";
+		} else if( useModel == modelANOVA ) {
+			statistic_name = "F-test";
+		}
+		if(!is.null(betafun))
+			statistic_name = paste('beta\t',statistic_name, sep='');
+		if( pvOutputThreshold > 0 )
+			saver.tra$start(output_file_name,     statistic_name, snps, gene, testfun, pvfun);
+		if( pvOutputThreshold.cis > 0 )
+			saver.cis$start(output_file_name.cis, statistic_name, snps, gene, testfun, pvfun);
+		rm( statistic_name );
+	}
+	################################# Some useful functions #################################
+	{
+		orthonormalize.snps = function(cursnps, ss) {
+			for(p in 1:length(cursnps)) {
+				if(length(correctionMatrix)>0) {
+					cursnps[[p]] = cursnps[[p]] %*% correctionMatrix;
+				}
+				cursnps[[p]] = cursnps[[p]] - tcrossprod(cursnps[[p]],cvrt) %*% cvrt;
+				for(w in .seq(1,p-1L))
+					cursnps[[p]] = cursnps[[p]] - rowSums(cursnps[[p]]*cursnps[[w]]) * cursnps[[w]];
+				div = sqrt( rowSums(cursnps[[p]]^2) );
+				div[ div == 0 ] = 1;
+				cursnps[[p]] = cursnps[[p]]/div;
+			}
+			snps.std$set(ss, div);
+			return(cursnps);
+		}
+# 		if( pvOutputThreshold.cis > 0 ) {
+# 			is.cis.pair = function(gg,ss) {
+# 				return(!( ( snpsloc[[ss]][1, 1] - tail( geneloc[[gg]][ , 2], n = 1L) > cisDist) |
+# 					    ( geneloc[[gg]][1, 1] - tail( snpsloc[[ss]]      , n = 1L) > cisDist) ) );
+# 			}
+# 		}
+ 		if( pvOutputThreshold.cis > 0 ) {
+# 			sn.l = sapply(snpsloc, function(x)x[1] );
+# 			sn.r = sapply(snpsloc, function(x)tail(x,1) );
+# 			ge.l = sapply(geneloc, function(x)x[1,1] );
+# 			ge.r = sapply(geneloc, function(x)x[nrow(x) , 2] );
+			sn.l = sapply(snpsloc, '[', 1 );
+			sn.r = sapply(snpsloc, tail, 1 );
+			ge.l = sapply(geneloc, '[', 1, 1 );
+			ge.r = sapply( lapply(geneloc, tail.matrix, 1 ), '[', 2);
+			gg.1 = findInterval( sn.l , ge.r + cisDist +1) + 1;
+# 			cat(gg.1,'\n')
+			gg.2 = findInterval( sn.r , ge.l - cisDist );
+# 			cat(gg.2,'\n')
+			rm(sn.l, sn.r, ge.l, ge.r);
+ 		}
+
+	}	
+	################################# Prepare counters and histogram bins ###################
+	{
+		pvbins = NULL; # bin edges for p-values
+		statbins = 0;  # bin edges for the test statistic (|t| or F)
+		do.hist = FALSE;
+		if( length(pvalue.hist) == 1 ) {
+			if(pvalue.hist == "qqplot") {
+				pvbins = c(0, 10^rev(seq(0, log10(.Machine$double.xmin)-1, -0.05)));
+			} else
+			if( is.numeric(pvalue.hist) ) {
+				pvbins = seq(from = 0, to = 1, length.out = pvalue.hist+1);
+			} else
+			if( pvalue.hist == TRUE ) {
+				pvbins = seq(from = 0, to = 1, length.out = 100+1);
+			}
+		} else
+		if( is.numeric(pvalue.hist) && (length(pvalue.hist) > 1) ) {
+			pvbins = pvalue.hist;
+		}
+		if( is.null(pvbins) && (pvalue.hist != FALSE) ) {
+			stop("Wrong value of pvalue.hist. Must be FALSE, TRUE, \"qqplot\", or numerical");
+		}
+		do.hist = !is.null(pvbins);
+		if( do.hist ) {
+			pvbins = sort(pvbins);
+			statbins = threshfun(pvbins);
+			if( pvOutputThreshold > 0) {
+				hist.all = .histogrammer$new(pvbins, statbins);
+			}
+			if( pvOutputThreshold.cis > 0) {
+				hist.cis = .histogrammer$new(pvbins, statbins);
+			}
+		}
+		rm( pvbins, statbins);
+		if(min.pv.by.genesnp) {
+			if( pvOutputThreshold > 0) {
+				minpv.tra = .minpvalue$new(snps,gene);
+			}
+			if( pvOutputThreshold.cis > 0) {
+				minpv.cis = .minpvalue$new(snps,gene);
+			}
+		}
+	}
+	################################# Main loop #############################################
+	{
+		beta = NULL;
+		n.tests.all = 0;
+		n.tests.cis = 0;
+		n.eqtls.tra = 0;
+		n.eqtls.cis = 0;
+		
+		status("Performing eQTL analysis");
+		# ss = 1; gg = 1;
+		# ss = snps$nSlices(); gg = gene$nSlices();
+		
+		snps_offset = 0;
+		for(ss in 1:snps$nSlices()) {
+# 		for(ss in 1:min(2,snps$nSlices())) { #for debug
+			cursnps = NULL;
+			nrcs = nrow(snps$getSliceRaw(ss));
+			
+			# loop only through the useful stuff
+			for(gg in if(pvOutputThreshold>0){1:gene$nSlices()}else{.seq(gg.1[ss],gg.2[ss])} ) {
+				gene_offset = gene_offsets[gg];
+				curgene = gene$getSlice(gg);
+				nrcg = nrow(curgene);
+				if(nrcg == 0) next;
+				
+				rp = "";
+				
+				statistic = NULL;
+				select.cis.raw = NULL;
+				## do cis analysis
+# 				if( (pvOutputThreshold.cis > 0) && ( is.cis.pair(gg, ss) ) ) {
+				if( (pvOutputThreshold.cis > 0) && (gg >= gg.1[ss]) && (gg <= gg.2[ss]) ) {
+					
+					if( is.null( statistic ) ) {
+						if( is.null( cursnps ) ) {
+							cursnps = orthonormalize.snps( snps_process( snps$getSlice(ss) ), ss );
+						}					
+						mat = vector("list", length(cursnps));
+						for(d in 1:length(cursnps)) {
+							mat[[d]] = tcrossprod(curgene, cursnps[[d]]);
+						}
+						statistic = statistic.fun( mat );
+						astatistic = afun(statistic);
+# 						rm(mat);
+					}
+					
+# 					sn.l = findInterval(geneloc[[gg]][ ,1] - cisDist-1  +1   , snpsloc[[ss]]);
+# 					sn.r = findInterval(geneloc[[gg]][ ,2] + cisDist    -1   , snpsloc[[ss]]);
+					sn.l = findInterval(geneloc[[gg]][ ,1] - cisDist-1, snpsloc[[ss]]);
+					sn.r = findInterval(geneloc[[gg]][ ,2] + cisDist, snpsloc[[ss]]);
+					xx = unlist(lapply(which(sn.r>sn.l),FUN=function(x){(sn.l[x]:(sn.r[x]-1))*nrow(statistic)+x}))
+					select.cis.raw = xx[ astatistic[xx] >= thresh.cis ];
+					select.cis = arrayInd(select.cis.raw, dim(statistic))
+					
+					n.tests.cis = n.tests.cis + length(xx);
+					n.eqtls.cis = n.eqtls.cis + length(select.cis.raw);
+					
+					if( do.hist )	
+						hist.cis$update(astatistic[xx]);
+					
+					if( min.pv.by.genesnp ) {
+	# 					minpv.cis$updatecis(ss, gg, arrayInd(xx, dim(statistic)), astatistic[xx])
+						temp = double(length(astatistic));
+						dim(temp) = dim(astatistic);
+						temp[xx] = astatistic[xx];
+						minpv.cis$update(ss, gg, temp);
+					}
+					
+					if(!is.null(betafun))
+						beta = betafun(mat[[length(mat)]][select.cis.raw], ss, gg, select.cis);
+										
+					saver.cis$update( snps_offset + select.cis[ , 2],
+														gene_offset + select.cis[ , 1],
+														statistic[select.cis.raw],
+														beta);
+					
+	# 				statistic.select.cis  = statistic[ select.cis ];
+	# 				test = testfun( statistic.select.cis );
+	# 				pv = pvfun(test);
+	# 				Saver.cis$WriteBlock( cbind(snps_offset + select.cis[ , 2], gene_offset + select.cis[ , 1], test, pv) );
+	# 				counter.cis$Update(gg, ss, select.cis, pv, n.tests = length(xx), if(do.hist) afun(statistic[xx]) )
+					rp = paste(rp, ", ", formatC(n.eqtls.cis, big.mark=",", format = "f", digits = 0), " cis-eQTLs", sep = "");
+				}
+				## do trans/all analysis
+				if(pvOutputThreshold>0) {
+					if( is.null( statistic ) ) {
+						if( is.null( cursnps ) ) {
+							cursnps = orthonormalize.snps( snps_process( snps$getSlice(ss) ), ss );
+						}
+						mat = vector("list", length(cursnps));
+						for(d in 1:length(cursnps)) {
+							mat[[d]] = tcrossprod(curgene, cursnps[[d]]);
+						}
+						statistic = statistic.fun( mat );
+						astatistic = afun(statistic);
+# 						rm(mat);
+					}
+	
+					if( do.hist )	
+						hist.all$update(astatistic);
+	
+					if(!is.null(select.cis.raw)) 
+						astatistic[xx] = -1;
+	# 					select.tra.raw = select.tra.raw[!(select.tra.raw %in% select.cis.raw)];
+					
+					select.tra.raw = which( astatistic >= thresh);
+					select.tra = arrayInd(select.tra.raw, dim(statistic))
+					
+					n.eqtls.tra = n.eqtls.tra + length(select.tra.raw);
+					n.tests.all = n.tests.all + length(statistic);
+
+					if(!is.null(betafun))
+						beta = betafun(mat[[length(mat)]][select.tra.raw], ss, gg, select.tra);
+										
+					saver.tra$update( snps_offset + select.tra[ , 2],
+														gene_offset + select.tra[ , 1],
+														statistic[select.tra.raw],
+														beta);
+					
+					if( min.pv.by.genesnp ) 
+						minpv.tra$update(ss, gg, astatistic)
+					
+	# 				statistic.select.tra = statistic[ select.tra ];
+	# 				test = testfun( statistic.select.tra );
+	# 				pv = pvfun( test );
+	# 				Saver$WriteBlock( cbind( snps_offset + select.tra[ , 2], gene_offset + select.tra[ , 1], test, pv) );
+	# 				counter$Update(gg, ss, select.tra, pv, n.tests = nrcs*nrcg, if(do.hist) afun(statistic) )
+					rp = paste(rp, ", ", formatC(n.eqtls.tra, big.mark=",", format = "f", digits = 0), if(pvOutputThreshold.cis > 0)" trans-"else" ","eQTLs", sep = "")
+				}
+	
+				#gene_offset = gene_offset + nrcg;
+				if( !is.null(statistic) ) {
+					per = 100*(gg/gene$nSlices() + ss-1) / snps$nSlices();
+					cat( formatC(floor(per*100)/100, format = "f", width = 5, digits = 2), "% done" , rp, "\n", sep = "");
+	 				flush.console();
+				}
+			} # gg in 1:gene$nSlices()
+			snps_offset = snps_offset + nrow(snps$getSliceRaw(ss));
+		} # ss in 1:snps$nSlices()
+	}
+	################################# Results collection ####################################
+	{
+		rez = list(time.in.sec = proc.time()[3] - start.time);
+		rez$param = params;
+		
+		if(pvOutputThreshold.cis > 0) {
+			rez.cis = list(ntests = n.tests.cis, neqtls = n.eqtls.cis);
+			rez.cis = c(rez.cis, saver.cis$getResults( gene, snps, n.tests.cis) );
+			if(do.hist)
+				rez.cis = c(rez.cis, hist.cis$getResults() );
+			if(min.pv.by.genesnp)
+				rez.cis = c(rez.cis, minpv.cis$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
+		}
+		
+		if(pvOutputThreshold>0) {
+			rez.all = list(ntests = n.tests.all, neqtls = n.eqtls.tra + n.eqtls.cis);
+			if(pvOutputThreshold.cis > 0) {
+				rez.tra = list(ntests = n.tests.all - n.tests.cis, neqtls = n.eqtls.tra);
+				rez.tra = c(rez.tra, saver.tra$getResults( gene, snps, n.tests.all - n.tests.cis) );
+			} else {
+				rez.all = c(rez.all, saver.tra$getResults( gene, snps, n.tests.all              ) );
+			}
+			if(do.hist) {
+				rez.all = c(rez.all, hist.all$getResults() );
+				if(pvOutputThreshold.cis > 0) {
+					rez.tra$hist.bins = rez.all$hist.bins;
+					rez.tra$hist.counts = rez.all$hist.counts - rez.cis$hist.counts;
+				}
+			}
+			if(min.pv.by.genesnp) {
+				if(pvOutputThreshold.cis > 0) {
+					rez.tra = c(rez.tra, minpv.tra$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
+				} else {
+					rez.all = c(rez.all, minpv.tra$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
+				}
+			}
+		}
+	
+		if(exists('rez.all')>0)
+			rez$all = rez.all;
+		if(exists('rez.tra')>0)
+			rez$trans = rez.tra;
+		if(exists('rez.cis')>0)
+			rez$cis = rez.cis;	
+		
+		class(rez) = c(class(rez),"MatrixEQTL");
+		status("");
+	}
+# 	cat('s std ',snps.std$get(1),'\n');
+# 	cat('g std ',gene.std$get(1),'\n');
+	################################# Results collection ####################################
+	return(rez);
+}
+
+.histme = function(m, name1, name2, ...) {
+	cnts = m$hist.counts;
+	bins = m$hist.bins;
+	ntst = m$ntests;
+	centers = 0.5 * (bins[-1L] + bins[-length(bins)]);
+	density = 0.5 / (bins[-1L] - centers) * cnts / ntst;
+	ntext = paste("P-value distribution for ", name1, formatC(ntst, big.mark=",", format = "f", digits = 0), name2, " gene-SNP pairs ",sep="");
+	r = structure(list(breaks = bins, counts = cnts, density = density,
+	      mids = centers, equidist = FALSE), class = "histogram");
+	plot(r, main = ntext, ylab = "Density", xlab = "P-values", ...)
+	abline( h = 1, col = "blue");
+	return(invisible());
+}
+
+.qqme = function(m, lcol, cex, pch, ...) {
+	cnts = m$hist.counts;
+	bins = m$hist.bins;
+	ntst = m$ntests;
+	
+	cusu = cumsum(cnts) / ntst;
+	ypos = bins[-1][is.finite(cusu)];
+	xpos = cusu[is.finite(cusu)];
+	lines(-log10(xpos), -log10(ypos), col = lcol, ...);
+# 	lines(xpos, ypos, col = lcol, ...);
+	if(length(m$eqtls$pvalue)==0)
+		return();
+	ypvs = -log10(m$eqtls$pvalue);
+	xpvs = -log10(1:length(ypvs) / ntst);
+	if(length(ypvs) > 1000) {
+		# need to filter a bit, make the plotting faster
+		levels = as.integer( xpvs/xpvs[1] * 1e3);
+		keep = c(TRUE, diff(levels)!=0);
+		levels = as.integer( ypvs/ypvs[1] * 1e3);
+		keep = keep | c(TRUE, diff(levels)!=0);
+		ypvs = ypvs[keep];
+		xpvs = xpvs[keep];
+		rm(keep)
+	}
+	points(xpvs, ypvs, col = lcol, pch = pch, cex = cex, ...);
+}
+
+plot.MatrixEQTL = function(x, cex = 0.5, pch = 19, xlim = NULL, ylim = NULL, ...) {
+	if( x$param$pvalue.hist == FALSE ) {
+		warning("Cannot plot p-value distribution: the information was not recorded.\nUse pvalue.hist!=FALSE.");
+		return(invisible());
+	}
+	if( x$param$pvalue.hist == "qqplot" ) {
+		xmin = 1/max(x$cis$ntests, x$all$ntests);
+		ymax = NULL;
+		if(!is.null(ylim)) {
+			ymax = ylim[2];
+		} else {
+			ymax = -log10(min( 
+					x$cis$eqtls$pvalue[1],   x$cis$hist.bins[  c(FALSE,x$cis$hist.counts>0)][1],
+					x$all$eqtls$pvalue[1],   x$all$hist.bins[  c(FALSE,x$all$hist.counts>0)][1],
+					x$trans$eqtls$pvalue[1], x$trans$hist.bins[c(FALSE,x$trans$hist.counts>0)][1],
+					na.rm = TRUE))+0.1;
+		}
+		if(ymax == 0) {
+			ymax = -log10(.Machine$double.xmin)
+		}
+		if(!is.null(ymax))
+			ylim = c(0,ymax);
+		
+		if(is.null(xlim))
+			xlim =  c(0, -log10(xmin/1.5));
+		
+		plot(numeric(),numeric(), xlab = "-Log10(p-value), theoretical",
+			ylab = "-Log10(p-value), observed",
+			xlim = c(0, -log10(xmin/1.5)),
+			ylim = ylim,
+			xaxs="i", yaxs="i", ...);
+		lines(c(0,1e3), c(0,1e3), col = "gray");
+		if((x$param$pvOutputThreshold > 0) && (x$param$pvOutputThreshold.cis > 0)) {
+			.qqme( x$cis, "red", cex, pch, ...);
+			.qqme( x$trans, "blue", cex, pch, ...);
+			title(paste("QQ-plot for",
+				formatC(x$cis$ntests, big.mark=",", format = "f", digits = 0),
+				"local and",
+				formatC(x$trans$ntests, big.mark=",", format = "f", digits = 0),
+				"distant gene-SNP p-values"));
+			lset = c(1,2,4);
+		} else
+		if(x$param$pvOutputThreshold.cis > 0) {
+			.qqme(x$cis, "red", cex, pch, ...);
+			title(paste("QQ-plot for",
+				formatC(x$cis$ntests, big.mark=",", format = "f", digits = 0),
+				"local gene-SNP p-values"));
+			lset = c(1,4);
+		} else {
+			.qqme(x$all, "blue", cex, pch, ...);
+			title(paste("QQ-plot for all",
+				formatC(x$all$ntests, big.mark=",", format = "f", digits = 0),
+				"gene-SNP p-values"));
+			lset = c(3,4);
+		}
+		legend("topleft",
+			c("Local p-values","Distant p-values","All p-values","diagonal")[lset],
+			col =      c("red","blue","blue","gray")[lset],
+			text.col = c("red","blue","blue","gray")[lset],
+			pch = 20, lwd = 1, pt.cex = c(1,1,1,0)[lset])
+	} else {
+		if((x$param$pvOutputThreshold > 0) && (x$param$pvOutputThreshold.cis > 0)) {
+			par(mfrow=c(2,1));
+			.histme(x$cis, "", " local", ...);
+			tran = list(hist.counts = x$all$hist.counts - x$cis$hist.counts,
+					hist.bins = x$all$hist.bins,
+					ntests =  x$all$ntests - x$cis$ntests);
+			.histme(x$trans,""," distant", ...);
+			par(mfrow=c(1,1));
+		} else
+		if(x$param$pvOutputThreshold.cis > 0) {
+			.histme(x$cis, "", " local", ...);
+		} else {
+			.histme(x$all, "all ", ""  , ...);
+		}
+	}
+	return(invisible());
+}
+
author	jasonxu
date	Fri, 12 Mar 2021 08:20:57 +0000
parents	cd4c8e4a4b5b
children