# HG changeset patch
# User jasonxu
# Date 1615537257 0
# Node ID ae74f8fb3aef178a1df4ea9ed789feca5cf6a9a8
# Parent  5fafba5359eb15608f467660ac7049230434fae5
Uploaded

diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/DESCRIPTION
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/DESCRIPTION	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,20 @@
+Package: MatrixEQTL
+Type: Package
+Title: Matrix eQTL: Ultra fast eQTL analysis via large matrix
+        operations
+Version: 2.1.1
+Date: 2014-02-24
+Author: Andrey Shabalin
+Maintainer: Andrey Shabalin <ashabalin@vcu.edu>
+Description: Matrix eQTL is designed for fast eQTL analysis on large datasets.
+	Matrix eQTL can test for association between genotype and gene expression using linear regression 
+	with either additive or ANOVA genotype effects.
+	The models can include covariates to account for factors 
+	as population stratification, gender, and clinical variables. 
+	It also supports models with heteroscedastic and/or correlated errors,
+	false discovery rate estimation and separate treatment of local (cis) and distant (trans) eQTLs.
+License: LGPL-3
+LazyLoad: yes
+URL: http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
+Depends: R (>= 2.12.0), methods
+Packaged: 2014-03-01 10:58:21 UTC; Andrey
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/NAMESPACE
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/NAMESPACE	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,27 @@
+importFrom(methods, setRefClass)
+exportPattern("^[[:alpha:]]+")
+exportClasses(
+    "SlicedData" 
+)
+S3method(plot, MatrixEQTL)
+S3method(summary, SlicedData)
+exportMethods(
+    "[[",
+    "[[<-",
+    "colnames",
+    "colnames<-",
+    "dim",
+    "length",
+    "ncol",
+    "NCOL",
+    "nrow",
+    "NROW",
+    "rownames",
+    "rownames<-",
+    "as.matrix",
+    "rowMeans",
+    "rowSums",
+    "colMeans",
+    "colSums"
+)
+
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/R/Matrix_eQTL_engine.R
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/R/Matrix_eQTL_engine.R	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,1964 @@
+# Matrix eQTL by Andrey A. Shabalin
+# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
+
+# http://cran.r-project.org/web/packages/policies.html
+
+library(methods)
+
+modelLINEAR = 117348L;
+modelANOVA  = 47074L;
+modelLINEAR_CROSS = 1113461L;
+
+.seq = function(a,b){if(a<=b){a:b}else{integer(0)}};
+
+SlicedData <- setRefClass( "SlicedData",
+	fields = list( 
+		dataEnv = "environment",
+		nSlices1 = "numeric",
+		rowNameSlices = "list",
+		columnNames = "character",
+		fileDelimiter = "character",
+		fileSkipColumns = "numeric",
+		fileSkipRows = "numeric",
+		fileSliceSize = "numeric",
+		fileOmitCharacters = "character"
+	),
+	methods = list(
+		initialize = function( mat = NULL ) {
+			dataEnv <<- new.env(hash = TRUE, size = 29L);
+			nSlices1 <<- 0L;
+			if(!is.null(mat)) {
+				CreateFromMatrix(mat);
+			}
+			fileSliceSize <<- 1000;
+			fileDelimiter <<- "\t";
+			fileSkipColumns <<- 1L;
+			fileSkipRows <<- 1L;
+			fileOmitCharacters <<- "NA"
+			return(invisible(.self));
+		},
+		CreateFromMatrix = function( mat ) {
+			stopifnot( class(mat) == "matrix" );
+			setSliceRaw( 1L ,mat );
+			rns = rownames( mat, do.NULL = FALSE);
+			#if( is.null(rns) ) {
+			#	rns = paste( "Row_",(1:nrow(mat)), sep="" );
+			#}
+			rowNameSlices <<- list(rns);
+			cns = colnames( mat, do.NULL = FALSE );
+			#if( is.null(cns) ){
+			#	cns = paste( "Col_",(1:ncol(mat)), sep="" );
+			#}
+			columnNames <<- cns;
+			return(invisible(.self));
+		},
+		getSlice = function(sl) {
+			value = get(paste(sl), dataEnv);
+			if( is.raw(value) ) {
+				storage.mode(value) = "double";
+				value[value == 255] = NA;
+			}
+			return( value  )	
+		},
+		getSliceRaw = function(sl) {
+			return( get(paste(sl), dataEnv) )	
+		},
+		setSliceRaw = function(sl, value) {
+			assign( paste(sl), value, dataEnv )
+			if( nSlices1 < sl ) {
+				nSlices1 <<- sl;
+			}
+		},
+		setSlice = function(sl, value) {
+			if( length(value) > 0 ) {
+				if( all(as.integer(value) == value, na.rm = TRUE) ) {
+					if( (min(value, na.rm = TRUE) >= 0 ) && 
+	                            (max(value, na.rm = TRUE) < 255) )
+					{
+						nv = value;
+						suppressWarnings({storage.mode(nv) = "raw"});
+						nv[ is.na(value)] = as.raw(255);
+						value = nv;
+					} else {
+						storage.mode(value) = "integer";
+					}
+				}
+			}
+			setSliceRaw(sl, value);
+		},	
+		nSlices = function() {
+			return( nSlices1 );
+		},
+		LoadFile = function(filename, skipRows = NULL, skipColumns = NULL, sliceSize = NULL, omitCharacters = NULL, delimiter = NULL, rowNamesColumn = 1) {
+			if( !is.null(skipRows) ) {
+				fileSkipRows <<- skipRows;
+			}
+			if( !is.null(skipColumns) ) {
+				fileSkipColumns <<- skipColumns;
+			}
+			if( !is.null(omitCharacters) ) {
+				fileOmitCharacters <<- omitCharacters;
+			}
+			if( !is.null(sliceSize) ) {
+				fileSliceSize <<- sliceSize;
+			}
+			if( !is.null(delimiter) ) {
+				fileDelimiter <<- delimiter;
+			}
+			stopifnot( (fileSkipColumns == 0) || (rowNamesColumn <= fileSkipColumns) )
+			stopifnot( (fileSkipColumns == 0) || (rowNamesColumn >= 1) )
+	
+			fid = file(description = filename, open = "rt", blocking = FALSE, raw = FALSE)
+			# clean object if file is open
+			Clear(); 
+			lines = readLines(con = fid, n = max(fileSkipRows,1L), ok = TRUE, warn = TRUE)
+			line1 = tail(lines,1);
+			splt = strsplit(line1, split = fileDelimiter, fixed = TRUE);
+			if( fileSkipRows > 0L ) {
+				columnNames <<- splt[[1]]; # [ -(1:fileSkipColumns) ];
+			} else {
+				seek(fid, 0)
+			}		
+			
+			rm( lines, line1, splt );
+			
+			rowNameSlices <<- vector("list", 15);
+	
+			curSliceId = 0L;
+			repeat
+			{
+				# preallocate names and data
+				if(length(rowNameSlices) < curSliceId) {
+					rowNameSlices[[2L*curSliceId]] <<- NULL;
+				}
+				curSliceId = curSliceId + 1L;
+				
+				# read sliceSize rows
+				rowtag = vector("character",fileSliceSize);
+				rowvals = vector("list",fileSliceSize);
+				for(i in 1:fileSliceSize) {
+					temp = "";
+					if( fileSkipColumns > 0L ) {
+						temp = scan(file = fid, what = character(), n = fileSkipColumns, quiet = TRUE,sep = fileDelimiter);
+					}
+					rowtag[i] = temp[rowNamesColumn];#paste(temp,collapse=" ");
+					rowvals[[i]] = scan(file = fid, what = double(), nlines = 1, quiet = TRUE, sep = fileDelimiter, na.strings = fileOmitCharacters);
+					if( length(rowvals[[i]]) == 0L ) {
+						if(i==1L) {
+							rowtag = matrix(0, 0, 0);
+							rowvals = character(0);
+						} else 	{
+							rowtag  = rowtag[  1:(i-1) ];
+							rowvals = rowvals[ 1:(i-1) ];
+						}
+						break;			
+					}
+				}
+				if( length(rowtag) == 0L ) {
+					curSliceId = curSliceId - 1L;
+					break;
+				}
+				rowNameSlices[[curSliceId]] <<- rowtag;
+				data = c(rowvals, recursive = TRUE);
+				dim(data) = c(length(rowvals[[1]]), length(rowvals));
+				data = t(data);
+				setSlice(curSliceId, data);
+				if( length(rowtag) < fileSliceSize ) {
+					break;
+				}
+				numtxt = formatC(curSliceId*fileSliceSize, big.mark=",", format = "f", digits = 0)
+				cat( "Rows read: ", numtxt, "\n");
+				flush.console()
+			}
+			close(fid)
+			if( fileSkipRows == 0 ) {
+				columnNames <<- paste("Col_", (1:nCols()), sep="");
+			} else {
+				columnNames <<- tail(columnNames, ncol(getSliceRaw(1)));
+			}
+			if( fileSkipColumns == 0 ) {
+				cnt = 0L;
+				for( sl in 1:nSlices() ) {
+					nr = length(getSliceRaw(sl));
+					rowNameSlices[[sl]] <<- paste("Row_",cnt + (1:nr),sep="");
+					cnt = cnt + nr;
+				}
+			}
+			rowNameSlices <<- rowNameSlices[1:curSliceId];
+			cat("Rows read: ", nRows(), " done.\n");
+			return(invisible(.self));
+		},
+		SaveFile = function(filename) {
+			if( nSlices() == 0 ) {
+				cat("No data to save");
+				return();
+			}
+			fid = file(filename,"wt");
+			for( sl in 1:nSlices() ) {
+				z = getSlice(sl);
+				rownames(z) = rowNameSlices[[sl]];
+				colnames(z) = columnNames;
+				write.table(z, file = fid, sep = "\t", 
+					col.names = (if(sl == 1){NA}else{FALSE}));
+			}
+			close(fid);
+		},
+		nRows = function() {
+			s = 0L;
+			for(sl in .seq(1,nSlices())) {
+				s = s + nrow(getSliceRaw(sl));
+			}
+			return( s )
+		},
+		nCols = function() {
+			if( nSlices() == 0L ) {
+				return(0L);
+			} else {
+				return( ncol(getSliceRaw(1L)) )
+			}
+		},
+		Clear = function() {
+			for( sl in .seq(1,nSlices()) ) {
+				rm(list = paste(sl), envir = dataEnv)
+			}
+			nSlices1 <<- 0L;
+			rowNameSlices <<- list();
+			columnNames <<- character();
+			return(invisible(.self));
+		},
+		IsCombined = function() {
+			return( nSlices() <= 1L );
+		},
+		GetAllRowNames = function() {
+			return( c(rowNameSlices, recursive=TRUE) );
+		},
+		SetNanRowMean = function() {
+			if( (nCols() == 0L) ) {
+				return(invisible(.self));
+			}
+			for( sl in .seq(1,nSlices()) ) {
+				slice = getSlice(sl);
+				if( any(is.na(slice)) ) {
+					rowmean = rowMeans(slice, na.rm = TRUE);
+					rowmean[is.na(rowmean)] = 0L;
+					for( j in which(!complete.cases(slice)) ) {
+						where1 = is.na(slice[j, ]);
+						slice[j, where1] = rowmean[j];
+					}
+					setSlice(sl, slice);
+				}
+			}
+			return(invisible(.self));
+		},
+		RowStandardizeCentered = function() {
+			for(sl in .seq(1,nSlices()) ) {
+				slice = getSlice(sl);
+				div = sqrt( rowSums(slice^2) );
+				div[ div == 0 ] = 1;
+				setSlice(sl, slice/div);
+			}
+			return(invisible(.self));
+		},
+		CombineInOneSlice = function() {
+			if( nSlices() <= 1L ) {
+				return(invisible(.self));			
+			}
+			nc = nCols();
+			nr = nRows();
+			datatypes = c("raw","integer","double");
+			datafuns = c(as.raw, as.integer, as.double);
+			datatype = character(nSlices());
+			for(sl in 1:nSlices()) {
+				datatype[sl] = typeof(getSliceRaw(sl));
+			}
+			mch = max(match(datatype,datatypes,nomatch = length(datatypes)));
+			datafun = datafuns[[mch]];
+			newData = matrix(datafun(0), nrow = nr, ncol = nc);
+			offset = 0;
+			for(sl in 1:nSlices()) {
+				if(mch==1) {
+					slice = getSliceRaw(sl);
+				} else {
+					slice = getSlice(sl);
+				}
+				newData[ offset + (1:nrow(slice)),] = datafun(slice);
+				setSlice(sl, numeric());
+				offset = offset + nrow(slice);
+			}
+			
+			nSlices1 <<- 1L;
+			setSliceRaw(1L, newData);
+			rm(newData);
+			
+			newrowNameSlices = GetAllRowNames();
+			rowNameSlices <<- list(newrowNameSlices)
+			return(invisible(.self));
+		},
+		ResliceCombined = function(sliceSize = -1) {
+			if( sliceSize > 0L ) {
+				fileSliceSize <<- sliceSize;
+			}
+			if( fileSliceSize <= 0 ) {
+				fileSliceSize <<- 1000;
+			}
+			if( IsCombined() ) {
+				nRows1 = nRows();
+				if(nRows1 == 0L) {
+					return(invisible(.self));
+				}
+				newNSlices = floor( (nRows1 + fileSliceSize - 1)/fileSliceSize );
+				oldData = getSliceRaw(1L);
+				#oldNames = rowNameSlices[[1]];
+				newNameslices = vector("list",newNSlices)
+				for( sl in 1:newNSlices ) {
+					range = (1+(sl-1)*fileSliceSize) : (min(nRows1,sl*fileSliceSize));
+					newpart = oldData[range, ,drop = FALSE];
+					if( is.raw(oldData) ) {
+						setSliceRaw( sl, newpart);
+					} else {
+						setSlice( sl, newpart);
+					}
+					newNameslices[[sl]] = rowNameSlices[[1]][range];
+				}
+				rowNameSlices <<- newNameslices ;
+			} else {
+				stop("Reslice of a sliced matrix is not supported yet. Use CombineInOneSlice first.");
+			}
+			return(invisible(.self));
+		},
+		Clone = function() {
+			clone = SlicedData$new();
+			for(sl in .seq(1,nSlices()) ) {
+				clone$setSliceRaw(sl,getSliceRaw(sl));
+			}
+			clone$rowNameSlices = rowNameSlices;
+			clone$columnNames = columnNames;
+			clone$fileDelimiter = fileDelimiter;
+			clone$fileSkipColumns = fileSkipColumns;
+			clone$fileSkipRows = fileSkipRows;
+			clone$fileSliceSize = fileSliceSize;
+			clone$fileOmitCharacters = fileOmitCharacters;
+			return( clone );		
+		},
+		RowMatrixMultiply = function(multiplier) {
+			for(sl in .seq(1,nSlices()) ) {
+				setSlice(sl, getSlice(sl) %*% multiplier);
+			}
+			return(invisible(.self));
+		},
+		ColumnSubsample = function(subset) {
+			for(sl in .seq(1,nSlices()) ) {
+				setSliceRaw(sl, getSliceRaw(sl)[ ,subset, drop = FALSE]);
+			}
+			columnNames <<- columnNames[subset];
+			return(invisible(.self));
+		},
+		RowReorderSimple = function(ordr) {
+			# had to use an inefficient and dirty method
+			# due to horrible memory management in R
+			if( (typeof(ordr) == "logical") && all(ordr) ) {
+				return(invisible(.self));
+			}
+			if( (length(ordr) == nRows()) && all(ordr == (1:length(ordr))) ) {
+				return(invisible(.self));
+			}
+			CombineInOneSlice();
+			gc();
+			setSliceRaw( 1L, getSliceRaw(1L)[ordr, ] );
+			rowNameSlices[[1]] <<- rowNameSlices[[1]][ordr];
+			gc();
+			ResliceCombined();
+			gc();
+			return(invisible(.self));
+		},
+		RowReorder = function(ordr) {
+			# transform logical into indices 
+			if( typeof(ordr) == "logical" ) {
+				if( length(ordr) == nRows() ) {
+					ordr = which(ordr);
+				} else {
+					stop("Parameter \"ordr\" has wrong length")
+				}
+			}
+			## first, check that anything has to be done at all
+			if( (length(ordr) == nRows()) && all(ordr == (1:length(ordr))) ) {
+				return(invisible(.self));
+			}
+			## check bounds
+			#if( (min(ordr) < 1) || (max(ordr) > nRows()) ) {
+			#	stop("Parameter \"ordr\" is out of bounds");
+			#}
+			## slice the data into individual rows
+			all_rows = vector("list", nSlices())
+			for( i in 1:nSlices() ) {
+				slice = getSliceRaw(i)
+				all_rows[[i]] = split(slice, 1:nrow(slice))
+				setSliceRaw(i,numeric())
+			}
+			gc();
+			all_rows = unlist(all_rows, recursive=FALSE, use.names = FALSE);
+			## Reorder the rows
+			all_rows = all_rows[ordr];
+			## get row names
+			all_names = GetAllRowNames();
+			## erase the set
+			rowNameSlices <<- list();
+			## sort names
+			all_names = all_names[ordr];
+			##
+			## Make slices back
+			nrows = length(all_rows);
+			nSlices1 <<- as.integer((nrows+fileSliceSize-1)/fileSliceSize);
+			##cat(nrows, " ", nSlices1);
+			rowNameSlices1 = vector("list", nSlices1);
+			for( i in 1:nSlices1 ) {
+				fr = 1 + fileSliceSize*(i-1);
+				to = min( fileSliceSize*i, nrows);
+	
+				subset = all_rows[fr:to];
+				types = unlist(lapply(subset,typeof));
+				israw = (types == "raw")
+				if(!all(israw == israw[1])) {
+					# some raw and some are not
+					subset = lapply(subset, function(x){if(is.raw(x)){x=as.integer(x);x[x==255] = NA;return(x)}else{return(x)}});
+				}
+				subset = unlist(subset);
+				dim(subset) = c( length(all_rows[[fr]]) , to - fr + 1)
+				#subset = matrix(subset, ncol = (to-fr+1));
+				if(is.raw(subset)) {
+					setSliceRaw(i, t(subset)); 
+				} else {
+					setSlice(i, t(subset)); 
+				}
+				rowNameSlices1[[i]] = all_names[fr:to];
+				all_rows[fr:to] = 0;
+				all_names[fr:to] = 0;
+			}
+			rowNameSlices <<- rowNameSlices1;
+			gc();
+			return(invisible(.self));
+		},
+		RowRemoveZeroEps = function(){
+			for(sl in .seq(1,nSlices()) ) {
+				slice = getSlice(sl);
+				amean = rowMeans(abs(slice));
+				remove = (amean < .Machine$double.eps*nCols());
+				if(any(remove)) {
+					rowNameSlices[[sl]] <<- rowNameSlices[[sl]][!remove];
+					setSlice(sl, slice[!remove, , drop = FALSE]);
+				}
+			}
+			return(invisible(.self));
+		},
+		FindRow = function(rowname) {
+			for(sl in .seq(1,nSlices()) ) {
+				mch = match(rowname,rowNameSlices[[sl]], nomatch = 0);
+				if( mch > 0 )
+				{
+					row = getSlice(sl)[mch[1], , drop=FALSE];
+					rownames(row) = rowname;
+					colnames(row) = columnNames;
+					return( list(slice = sl, item = mch, row = row) );
+				}
+			}
+			return( NULL );
+		},
+		show = function() {
+			cat("SlicedData object. For more information type: ?SlicedData\n");
+			cat("Number of columns:", nCols(), "\n");
+			cat("Number of rows:", nRows(), "\n");
+			cat("Data is stored in", nSlices(), "slices\n");
+			if(nCols()>0) {
+				z = getSlice(1L);
+				if(nrow(z)>0) {
+					z = z[1:min(nrow(z),10L), 1:min(ncol(z),10L), drop = FALSE];
+					rownames(z) = rowNameSlices[[1]][1:nrow(z)];
+					colnames(z) = columnNames[1:ncol(z)];
+					cat("Top left corner of the first slice (up to 10x10):\n");
+					methods:::show(z)
+				}
+			}		
+		}
+	))
+
+setGeneric("nrow")
+setMethod("nrow", "SlicedData",	function(x) {
+		return( x$nRows() );
+	})
+setGeneric("NROW")
+setMethod("NROW", "SlicedData",	function(x) {
+		return( x$nRows() );
+	})
+setGeneric("ncol")
+setMethod("ncol", "SlicedData",	function(x) {
+		return( x$nCols() );
+	})
+setGeneric("NCOL")
+setMethod("NCOL", "SlicedData",	function(x) {
+		return( x$nCols() );
+	})
+setGeneric("dim")
+setMethod("dim", "SlicedData",	function(x) {
+		return( c(x$nRows(),x$nCols()) );
+	})
+setGeneric("colnames")
+setMethod("colnames", "SlicedData",	function(x) {
+		return( x$columnNames );
+	})
+setGeneric("rownames")
+setMethod("rownames", "SlicedData",	function(x) {
+		return( x$GetAllRowNames() );
+	})
+setMethod("[[", "SlicedData",	function(x,i) {
+		return( x$getSlice(i) );
+	})
+setGeneric("length")
+setMethod("length", "SlicedData",	function(x) {
+		return( x$nSlices() );
+	})
+setMethod("[[<-", "SlicedData",	function(x,i,value) {
+		x$setSlice(i, value);
+		return(x);
+})
+summary.SlicedData = function(object, ...) {
+	z = c(nCols = object$nCols(), nRows = object$nRows(), nSlices = object$nSlices());
+	return(z);
+}
+
+##### setGeneric("summary") #####
+#setMethod("summary", "SlicedData",	function(object, ...) {
+#		z = c(nCols = object$nCols(), nRows = object$nRows(), nSlices = object$nSlices());
+#		return(z);
+#	})
+#setGeneric("show", standardGeneric("show"))
+# setMethod("show", "SlicedData",	function(object) {
+# 		cat("SlicedData object. For more information type: ?SlicedData\n");
+# 		cat("Number of columns:", object$nCols(), "\n");
+# 		cat("Number of rows:", object$nRows(), "\n");
+# 		cat("Data is stored in", object$nSlices(), "slices\n");
+# 		if(object$nSlices()>0) {
+# 			z = object$getSlice(1);
+# 			if(nrow(z)>0) {
+# 				z = z[1:min(nrow(z),10), 1:min(ncol(z),10), drop = FALSE];
+# 				rownames(z) = object$rowNameSlices[[1]][1:nrow(z)];
+# 				colnames(z) = object$columnNames[1:ncol(z)];
+# 				cat("Top left corner of the first slice (up to 10x10):\n");
+# 				show(z)
+# 			}
+# 		}		
+# 	})
+
+setGeneric("as.matrix")
+setMethod("as.matrix", "SlicedData", function(x) {
+		if(x$nSlices() == 0) {
+			return( matrix(0,0,0) );
+		}
+		if(x$nSlices() > 1) {
+			copy = x$Clone();
+			copy$CombineInOneSlice();
+		} else {
+			copy = x;
+		}
+		mat = copy$getSlice(1L);
+		rownames(mat) = rownames(copy);
+		colnames(mat) = colnames(copy);
+		return( mat );
+	})
+setGeneric("colnames<-")
+setMethod("colnames<-", "SlicedData", function(x,value) {
+		stopifnot( class(value) == "character" );
+		stopifnot( length(value) == x$nCols() );
+		x$columnNames = value;
+		return(x);
+	})
+setGeneric("rownames<-")
+setMethod("rownames<-", "SlicedData", function(x,value) {
+		stopifnot( class(value) == "character" );
+		stopifnot( length(value) == x$nRows() );
+		start = 1;
+		newNameSlices = vector("list", x$nSlices());
+		for( i in .seq(1,x$nSlices()) ) {
+			nr = nrow(x$getSliceRaw(i));
+			newNameSlices[[i]] = value[ start:(start+nr-1) ];
+			start = start + nr;
+		}
+		x$rowNameSlices = newNameSlices; 
+		return(x);
+	})
+setGeneric("rowSums")
+setMethod("rowSums", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
+		if(x$nSlices() == 0) {
+			return( numeric() );
+		}
+		stopifnot( dims == 1 );
+		thesum = vector("list", x$nSlices());
+		for( i in 1:x$nSlices() ) {
+			thesum[[i]] = rowSums(x$getSlice(i), na.rm)
+		}
+		return(unlist(thesum, recursive = FALSE, use.names = FALSE));
+	})
+setGeneric("rowMeans")
+setMethod("rowMeans", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
+		if(x$nSlices() == 0) {
+			return( numeric() );
+		}
+		stopifnot( dims == 1 );
+		thesum = vector("list", x$nSlices());
+		for( i in 1:x$nSlices() ) {
+			thesum[[i]] = rowMeans(x$getSlice(i), na.rm)
+		}
+		return(unlist(thesum, recursive = FALSE, use.names = FALSE));
+	})
+setGeneric("colSums")
+setMethod("colSums", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
+		if(x$nCols() == 0) {
+			return( numeric() );
+		}
+		stopifnot( dims == 1 );
+		thesum = 0;
+		for( i in .seq(1,x$nSlices()) ) {
+			thesum = thesum + colSums(x$getSlice(i), na.rm)
+		}
+		return(thesum);
+	})
+setGeneric("colMeans")
+setMethod("colMeans", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
+		if(x$nCols() == 0) {
+			return( numeric() );
+		}
+		stopifnot( dims == 1 );
+		thesum = 0;
+		thecounts = x$nRows();
+		for( i in .seq(1,x$nSlices()) ) {
+			slice = x$getSlice(i);
+			thesum = thesum + colSums(slice, na.rm)
+			if( na.rm ) {
+				thecounts = thecounts - colSums(is.na(slice))
+			}
+		}
+		return(thesum/thecounts);
+	})
+
+.listBuilder <- setRefClass(".listBuilder",
+	fields = list(
+		dataEnv = "environment",
+		n = "numeric"
+	),
+	methods = list(
+		initialize = function() {
+			dataEnv <<- new.env(hash = TRUE);
+			n <<- 0;
+# 			cumlength <<- 0;
+			return(.self);
+		},
+		add = function(x) {
+			if(length(x) > 0) {
+				n <<- n + 1;
+# 				cumlength <<- cumlength + length(x);
+				assign(paste(n), x, dataEnv );
+			}
+			return(.self);
+		},
+		set = function(i,x) {
+			if(length(x) > 0) {
+				if(i>n)
+					n <<- i;
+				assign(paste(i), x, dataEnv );
+			}
+			return(.self);
+		},
+		get = function(i) {
+			return(base::get(paste(i),dataEnv));
+		},
+		list = function() {
+			if(n==0)	return(list());
+			result = vector('list',n);
+			for( i in 1:n) {
+				result[[i]] = .self$get(i);
+			}
+			return(result);
+		},
+		unlist = function() {
+			return(base::unlist(.self$list(), recursive=FALSE, use.names = FALSE));
+		},
+		show = function() {
+			cat(".listBuilder object.\nIternal object in MatrixEQTL package.\n");
+			cat("Number of elements:", object$n, "\n");
+		}
+	))
+
+.histogrammer <- setRefClass(".histogrammer",
+	fields = list(
+		pvbins1 = "numeric",
+		statbins1 = "numeric",
+		hist.count = "numeric"
+	),
+	methods = list(
+		initialize = function (pvbins, statbins) {
+			if(length(pvbins)) {
+				ord = order(statbins);
+				pvbins1 <<- pvbins[ord];
+				statbins1 <<- statbins[ord];
+				hist.count <<- double(length(pvbins)-1);
+			}
+			return(.self);
+		},
+		update = function(stats.for.hist) {
+			h = hist(stats.for.hist, breaks = statbins1, include.lowest = TRUE, right = TRUE, plot = FALSE)$counts;
+			hist.count <<- hist.count + h;
+		},
+		getResults = function() {
+			if(!is.unsorted(pvbins1)) {
+				return(list(hist.bins =     pvbins1 , hist.counts =     hist.count ));
+			} else {
+				return(list(hist.bins = rev(pvbins1), hist.counts = rev(hist.count)));
+			}
+		}
+	))
+
+
+.minpvalue <- setRefClass(".minpvalue",
+	fields = list(
+		sdata = ".listBuilder",
+		gdata = ".listBuilder"
+	),
+	methods = list(
+		initialize = function(snps, gene) {
+			sdata <<- .listBuilder$new();
+			for( ss in 1:snps$nSlices() ) {
+				sdata$set( ss, double(nrow(snps$getSliceRaw(ss))));
+			}
+			gdata <<- .listBuilder$new();
+			for( gg in 1:gene$nSlices() ) {
+				gdata$set( gg, double(nrow(gene$getSliceRaw(gg))));
+			}
+			return(.self);
+		},
+		update = function(ss, gg, astat) {
+			gmax = gdata$get(gg)
+			z1 = max.col(astat,ties.method='first');
+			z11 = astat[1:nrow(astat) + nrow(astat) * (z1 - 1)];
+			gmax = pmax(gmax, z11);
+			gdata$set(gg, gmax);
+			
+			smax = sdata$get(ss)
+			z22 = apply(astat,2,max);
+			smax = pmax(smax, z22);
+			sdata$set(ss, smax);
+			return(.self);
+		},
+		updatecis = function(ss, gg, select.cis, astat) {
+			if(length(astat)>0)
+			{
+				byrows = aggregate(x=astat, by=list(row=select.cis[,1]), FUN=max);
+				bycols = aggregate(x=astat, by=list(col=select.cis[,2]), FUN=max);
+	
+				gmax = gdata$get(gg);
+				gmax[byrows$row] = pmax(gmax[byrows$row], byrows$x)
+				gdata$set(gg, gmax);
+				
+				smax = sdata$get(ss)
+				smax[bycols$col] = pmax(smax[bycols$col], bycols$x)
+				sdata$set(ss, smax);
+			}			
+			return(.self);
+		},
+		getResults = function(snps, gene, pvfun) {
+			min.pv.snps = pvfun(sdata$unlist());
+			names(min.pv.snps) = rownames(snps);
+			min.pv.gene = pvfun(gdata$unlist());
+			names(min.pv.gene) = rownames(gene);
+			return(list(min.pv.snps = min.pv.snps, min.pv.gene = min.pv.gene));
+		}
+	))
+
+.OutputSaver_FRD <- setRefClass(".OutputSaver_FRD",
+	fields = list(
+		sdata = ".listBuilder",
+		gdata = ".listBuilder",
+		cdata = ".listBuilder",
+		bdata = ".listBuilder",
+		fid = "list",
+		testfun1 = "list",
+		pvfun1 = "list"
+	),
+	methods = list(
+		initialize = function () {
+			sdata <<- .listBuilder$new();
+			gdata <<- .listBuilder$new();
+			cdata <<- .listBuilder$new();
+			bdata <<- .listBuilder$new();
+			fid <<- list(0);
+			testfun1 <<- list(0);
+			pvfun1 <<- list(0);
+			return(.self);
+		},
+		start = function(filename, statistic_name, unused1, unused2, testfun, pvfun) {
+			testfun1 <<- list(testfun);
+			pvfun1 <<- list(pvfun);
+			if(length(filename) > 0) {
+				if(class(filename) == "character") {
+					fid <<- list(file(description = filename, open = "wt", blocking = FALSE, raw = FALSE), TRUE);
+				} else {
+					fid <<- list(filename, FALSE)
+				}
+				writeLines( paste("SNP\tgene\t",statistic_name,"\tp-value\tFDR", sep = ""), fid[[1]]);
+			} else {
+				fid <<- list();
+			}
+		},
+		update = function(spos, gpos, sta, beta = NULL) {
+			if(length(sta)>0) {
+				sdata$add(spos);
+				gdata$add(gpos);
+				cdata$add(sta );
+				if(!is.null(beta ))
+					bdata$add(beta );
+			}
+			return(.self);
+		},
+		getResults = function( gene, snps, FDR_total_count) {
+			pvalues = NULL;
+ 			if(cdata$n > 0) {
+ 				tests = testfun1[[1]](cdata$unlist());
+ 				cdata <<- .listBuilder$new();
+ 				
+ 				pvalues = pvfun1[[1]](tests);
+ 				ord = order(pvalues);
+ 				
+ 				tests = tests[ord];
+ 				pvalues = pvalues[ord];
+ 				
+ 				FDR = pvalues * FDR_total_count / (1:length(pvalues));
+ 				FDR[length(FDR)] = min(FDR[length(FDR)], 1);
+ 				FDR = rev(cummin(rev(FDR)));
+ 				
+ 				snps_names  = snps$GetAllRowNames()[sdata$unlist()[ord]];
+ 				sdata <<- .listBuilder$new();
+				gene_names  = gene$GetAllRowNames()[gdata$unlist()[ord]];
+ 				gdata <<- .listBuilder$new();
+ 				
+ 				beta = NULL;
+ 				if(bdata$n > 0)
+ 					beta = bdata$unlist()[ord];
+				
+ 				if(length(fid)>0)	{	
+					step = 1000; ########### 100000
+					for( part in 1:ceiling(length(FDR)/step) ) {
+	 					fr = (part-1)*step + 1;
+	 					to = min(part*step, length(FDR));
+						dump = data.frame(snps_names[fr:to],
+															gene_names[fr:to],
+															if(is.null(beta)) tests[fr:to] else list(beta[fr:to],tests[fr:to]),
+															pvalues[fr:to],
+															FDR[fr:to], 
+															row.names = NULL, 
+															check.rows = FALSE, 
+															check.names = FALSE, 
+															stringsAsFactors = FALSE);
+						write.table(dump, file = fid[[1]], quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE);
+					}
+ 				}
+			} else {
+				cat("No significant associations were found.\n", file = if(length(fid)>0){fid[[1]]}else{""});
+			}
+			if(length(fid)>0)	{	
+				if(fid[[2]]) {
+					close(fid[[1]]);
+				}
+	 			fid <<- list();
+			}
+ 			
+ 			if(!is.null(pvalues)) {
+ 				eqtls = list( snps = snps_names,
+							 				gene = gene_names,
+											statistic = tests,
+											pvalue = pvalues,
+											FDR = FDR);
+ 				if(!is.null(beta))
+ 					eqtls$beta = beta;
+ 			} else {
+ 				eqtls = list( snps = character(),
+							 				gene = character(),
+				 							beta = numeric(),
+											statistic = numeric(),
+											pvalue = numeric(),
+											FDR = numeric());
+ 			}
+			return(list(eqtls = data.frame(eqtls)));
+		}
+	)
+)
+
+
+.OutputSaver_direct <- setRefClass(".OutputSaver_direct",
+	fields = list(
+		gene_names = "character",
+		snps_names = "character",
+		fid = "list",
+		testfun1 = "list",
+		pvfun1 = "list"
+	),
+	methods = list(
+		initialize = function() {
+			gene_names <<- character(0);
+			snps_names <<- character(0);
+			fid <<- list(0);
+			testfun1 <<- list(0);
+			pvfun1 <<- list(0);
+			return(.self);
+		},
+		start = function(filename, statistic_name, snps, gene, testfun, pvfun) {
+			# I hope the program stops if it fails to open the file
+			if(class(filename) == "character") {
+				fid <<- list(file(description = filename, open = "wt", blocking = FALSE, raw = FALSE), TRUE);
+			} else {
+				fid <<- list(filename, FALSE)
+			}
+			writeLines(paste("SNP\tgene\t", statistic_name, "\tp-value", sep = ""), fid[[1]]);
+			gene_names <<- gene$GetAllRowNames();
+			snps_names <<- snps$GetAllRowNames();
+			testfun1 <<- list(testfun);
+			pvfun1 <<- list(pvfun);
+		},
+		update = function(spos, gpos, sta, beta = NULL) {
+			if( length(sta) == 0 )
+				return();
+			sta = testfun1[[1]](sta);
+			lst = list(snps = snps_names[spos], gene = gene_names[gpos], beta = beta, statistic = sta, pvalue = pvfun1[[1]](sta));
+			lst$beta = lst$beta;
+			
+			dump2 = data.frame(lst, row.names = NULL, check.rows = FALSE, check.names = FALSE, stringsAsFactors = FALSE);
+			write.table(dump2, file = fid[[1]], quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE);
+		},
+		getResults = function(...) {
+			if(length(fid)>0)	{	
+				if(fid[[2]]) {
+					close(fid[[1]]);
+				}
+				fid <<- list();
+			}
+			gene_names <<- character(0);
+			snps_names <<- character(0);
+ 			return(list());
+		}
+	)
+)
+
+.my.pmin = function(x, val) {
+	# minimum "pmin" function that can handle empty array
+	if(length(x) == 0) {
+		return(x)
+	} else {
+		return(pmin.int(x,val));
+	}	
+}
+
+.my.pmax = function(x, val) {
+	# minimum "pmin" function that can handle empty array
+	if(length(x) == 0) {
+		return(x)
+	} else {
+		return(pmax.int(x,val));
+	}	
+}
+
+.pv.nz = function(x){return( .my.pmax(x,.Machine$double.xmin) )}
+ 
+.SetNanRowMean = function(x) {
+	if( any(is.na(x)) ) {
+		rowmean = rowMeans(x, na.rm = TRUE);
+		rowmean[ is.na(rowmean) ] = 0;
+		for( j in which(!complete.cases(x)) ) {
+			where1 = is.na( x[j, ] );
+			x[j,where1] = rowmean[j];
+		}
+	}
+	return(x);
+}
+
+# .SNP_process_split_for_ANOVA = function(x) {
+# 		# split into 2 dummy variables
+# 
+# 		uniq = unique(c(x));
+# 		uniq = uniq[!is.na(uniq)];
+# 		
+# 		if( length(uniq) > 3 ) {
+# 			stop("More than three genotype categories is not handled by ANOVA");
+# 		} else if ( length(uniq) < 3 ) {
+# 			uniq = c(uniq, min(uniq)-(1:(3-length(uniq))));
+# 		}
+# 		
+# 		freq = matrix(0, nrow(x), length(uniq));
+# 		for(i in 1:length(uniq)) {
+# 			freq[ ,i] = rowSums(x==uniq[i], na.rm = TRUE);
+# 		}
+# 		
+# 		md = apply(freq, 1, which.max);
+# 		freq[ cbind(1:nrow(x),md) ] = -1;
+# 		
+# 		md = apply(freq, 1, which.max); # min(freq[cbind(1:nrow(slice),md)] - rowSums(select,na.rm = TRUE ))
+# 		new_slice1 = (x == uniq[md]);
+# 		new_slice1[is.na(new_slice1)] = 0;
+# 		freq[ cbind(1:nrow(x),md) ] = -1;
+# 				
+# 		md = apply(freq,1,which.max);
+# 		new_slice2 = (x == uniq[md]);
+# 		new_slice2[ is.na(new_slice2) ] = 0;
+# 		rez = vector("list", 2);
+# 		rez[[1]] = new_slice1;
+# 		rez[[2]] = new_slice2;
+# 		return( rez );
+# }
+
+.SNP_process_split_for_ANOVA = function(x,n.groups) {
+	# split into 2 dummy variables (or more)
+	
+# 	# Get the number of ANOVA groups
+# 	n.groups = options('MatrixEQTL.ANOVA.categories')[[1]];
+# 	if( is.null(n.groups))
+# 		n.groups = 3;
+	
+	# Unique values in x (make sure it has length of n.groups);
+	uniq = unique(as.vector(x));
+	uniq = uniq[!is.na(uniq)];
+	if( length(uniq) > n.groups ) {
+		stop("More than declared number of genotype categories is detected by ANOVA");
+	} else if ( length(uniq) < n.groups ) {
+		uniq = c(uniq, rep(min(uniq)-1, n.groups-length(uniq)));
+	}
+	
+	# Table of frequencies for each variable (row)
+	freq = matrix(0, nrow(x), n.groups);
+	for(i in 1:n.groups) {
+		freq[ ,i] = rowSums(x==uniq[i], na.rm = TRUE);
+	}
+	# remove NA's from x for convenience
+	x[is.na(x)] = min(uniq)-2;
+	
+	# Output list of matrices
+	rez = vector('list',n.groups-1);
+
+	# Skip the most frequent value
+	md = apply(freq, 1, which.max); # most frequent value for each variable
+	freq[ cbind(1:nrow(x),md) ] = -1;
+	
+	# The rest form dumm
+	for(j in 1:(n.groups-1)){
+		md = apply(freq, 1, which.max); 
+		freq[ cbind(1:nrow(x),md) ] = -1;
+		rez[[j]] = (x == uniq[md]);
+	}			
+	return( rez );
+}
+
+Matrix_eQTL_engine = function(
+						snps, 
+						gene, 
+						cvrt = SlicedData$new(), 
+						output_file_name, 
+						pvOutputThreshold = 1e-5, 
+						useModel = modelLINEAR, 
+						errorCovariance = numeric(), 
+						verbose = TRUE,
+ 						pvalue.hist = FALSE,
+						min.pv.by.genesnp = FALSE,
+						noFDRsaveMemory = FALSE) {
+	rez = Matrix_eQTL_main(
+				snps = snps, 
+				gene = gene, 
+				cvrt = cvrt, 
+				output_file_name = output_file_name, 
+				pvOutputThreshold = pvOutputThreshold,
+				useModel = useModel, 
+				errorCovariance = errorCovariance, 
+				verbose = verbose,
+ 				pvalue.hist = pvalue.hist,
+				min.pv.by.genesnp = min.pv.by.genesnp,
+				noFDRsaveMemory = noFDRsaveMemory);
+	return( rez );
+}
+
+Matrix_eQTL_main = function(	
+						snps, 
+						gene, 
+						cvrt = SlicedData$new(), 
+						output_file_name = "", 
+						pvOutputThreshold = 1e-5,
+						useModel = modelLINEAR, 
+						errorCovariance = numeric(), 
+						verbose = TRUE, 
+						output_file_name.cis = "", 
+						pvOutputThreshold.cis = 0,
+						snpspos = NULL, 
+						genepos = NULL,
+						cisDist = 1e6,
+ 						pvalue.hist = FALSE,
+						min.pv.by.genesnp = FALSE,
+						noFDRsaveMemory = FALSE) {
+	################################# Basic variable checks #################################
+ 	{				
+		# status("Performing basic checks of the input variables");
+		stopifnot( "SlicedData" %in% class(gene) );
+		stopifnot( "SlicedData" %in% class(snps) );
+		stopifnot( "SlicedData" %in% class(cvrt) );
+		
+		# Check dimensions
+		if( min(snps$nRows(),snps$nCols()) == 0 )
+			stop("Empty genotype dataset");
+		if( min(gene$nRows(),gene$nCols()) == 0 )
+			stop("Empty expression dataset");
+		if( snps$nCols() != gene$nCols() )
+			stop("Different number of samples in the genotype and gene expression files");
+		if( cvrt$nRows()>0 ) {
+			if( snps$nCols() != cvrt$nCols() )
+				stop("Wrong number of samples in the matrix of covariates");
+		}
+
+		stopifnot( class(pvOutputThreshold) == "numeric" );
+		stopifnot( length(pvOutputThreshold) == 1 );
+		stopifnot( pvOutputThreshold >= 0 );
+		stopifnot( pvOutputThreshold <= 1 );
+
+		stopifnot(  class(noFDRsaveMemory) == "logical" );
+		stopifnot( length(noFDRsaveMemory) == 1 );
+
+		if( pvOutputThreshold > 0 ) {
+			stopifnot( !((length(output_file_name) == 0) && noFDRsaveMemory) )
+			stopifnot( length(output_file_name) <= 1 );
+			if( length(output_file_name) == 1 ) {
+				stopifnot( class(output_file_name) %in% c("character","connection") );
+			}
+		}
+		
+		stopifnot( class(pvOutputThreshold.cis) == "numeric" );
+		stopifnot( length(pvOutputThreshold.cis) == 1 );
+		stopifnot( pvOutputThreshold.cis >= 0 );
+		stopifnot( pvOutputThreshold.cis <= 1 );
+		stopifnot( !((pvOutputThreshold > 0) & (pvOutputThreshold.cis > 0) & (pvOutputThreshold > pvOutputThreshold.cis)) );
+		stopifnot( (pvOutputThreshold > 0) | (pvOutputThreshold.cis > 0) );
+
+		stopifnot( class(useModel) == class(modelLINEAR) );
+		stopifnot( length(useModel) == 1 );
+		stopifnot( useModel %in% c(modelLINEAR, modelANOVA, modelLINEAR_CROSS) );
+		if( useModel %in%  c(modelLINEAR, modelLINEAR_CROSS) ) {
+			if( snps$nCols() <= cvrt$nRows() + 1 + 1) {
+				stop('The number of covariates exceeds the number of samples.\nLinear regression can not be fit.')
+			}
+		}
+		if( useModel == modelLINEAR_CROSS ) {
+			if( cvrt$nRows() == 0 ) {
+				stop( "Model \"modelLINEAR_CROSS\" requires at least one covariate" );
+			}
+		}
+		if( useModel == modelANOVA ) {
+			n.anova.groups = getOption('MatrixEQTL.ANOVA.categories', 3);
+			stopifnot( n.anova.groups == floor(n.anova.groups) );
+			stopifnot( n.anova.groups >= 3 );
+# 			stopifnot( n.anova.groups < snps$nCols() - cvrt$nRows() - 2 );
+			if( snps$nCols() <= cvrt$nRows() + n.anova.groups) {
+				stop('The number of covariates exceeds the number of samples.\nLinear regression (ANOVA) can not be fit.')
+			}
+		}
+		
+		stopifnot(  class(verbose) == "logical" );
+		stopifnot( length(verbose) == 1 );
+
+		stopifnot(  class(min.pv.by.genesnp) == "logical" );
+		stopifnot( length(min.pv.by.genesnp) == 1 );
+	
+		if( pvOutputThreshold.cis > 0 ) {
+			stopifnot( !((length(output_file_name.cis) == 0) && noFDRsaveMemory) )
+			stopifnot( length(output_file_name.cis) <= 1 );
+			if( length(output_file_name.cis) == 1 ) {
+				stopifnot( class(output_file_name.cis) %in% c("character","connection") );
+			}
+
+# 			stopifnot( class(output_file_name.cis) == "character" );
+# 			stopifnot( length(output_file_name.cis) == 1 );
+			stopifnot( class(snpspos) == "data.frame" );
+			stopifnot( ncol(snpspos) == 3 );
+			stopifnot( nrow(snpspos) > 0 );
+			stopifnot( class(snpspos[1,3]) %in% c("integer", "numeric") )
+			stopifnot( !any(is.na(snpspos[,3])) )
+			stopifnot( class(genepos) == "data.frame" );
+			stopifnot( ncol(genepos) == 4 );
+			stopifnot( nrow(genepos) > 0 );
+			stopifnot( class(genepos[1,3]) %in% c("integer", "numeric") )
+			stopifnot( class(genepos[1,4]) %in% c("integer", "numeric") )
+			stopifnot( !any(is.na(genepos[,3])) )
+			stopifnot( !any(is.na(genepos[,4])) )
+			stopifnot( nzchar(output_file_name.cis) )
+		}
+		
+		if( pvOutputThreshold > 0 ) {
+			stopifnot( nzchar(output_file_name) )
+		}
+		
+		stopifnot( class(errorCovariance) %in% c("numeric", "matrix") );
+		errorCovariance = as.matrix(errorCovariance);
+		if(length(errorCovariance)>0) {
+			if( nrow(errorCovariance) != ncol(errorCovariance) ) {
+				stop("The covariance matrix is not square");
+			}	
+			if( nrow(errorCovariance) != snps$nCols() ) {
+				stop("The covariance matrix size does not match the number of samples");
+			}
+			if( !all(errorCovariance == t(errorCovariance)) ) {
+				stop("The covariance matrix is not symmetric");
+			}
+		}
+	}
+	################################# Initial setup #########################################
+	{
+		gene.std = .listBuilder$new();
+		snps.std = .listBuilder$new();
+		
+		dont.clone.gene = getOption('MatrixEQTL.dont.preserve.gene.object', FALSE)
+		if(is.null(dont.clone.gene))
+			dont.clone.gene = FALSE;
+		
+		if( !dont.clone.gene )
+			gene = gene$Clone();
+		# snps = snps$Clone(); # snps is read only
+		cvrt = cvrt$Clone();
+
+		params = list(
+			output_file_name = output_file_name, 
+			pvOutputThreshold = pvOutputThreshold,
+			useModel = useModel, 
+			errorCovariance = errorCovariance , 
+			verbose = verbose, 
+			output_file_name.cis = output_file_name.cis, 
+			pvOutputThreshold.cis = pvOutputThreshold.cis,
+			cisDist = cisDist ,
+	 		pvalue.hist = pvalue.hist,
+			min.pv.by.genesnp = min.pv.by.genesnp);
+
+		if( verbose ) {
+			lastTime = 0;
+			status <- function(text) {
+				# gc();
+				newTime = proc.time()[3];
+				if(lastTime != 0) {
+					cat("Task finished in ", newTime-lastTime, " seconds\n");
+				}
+				cat(text,"\n");
+				lastTime <<- newTime;
+				unused = flush.console();
+			}
+		} else {
+			status = function(text){}
+		}
+		start.time = proc.time()[3];
+	}
+	################################# Error covariance matrix processing ####################
+	{
+		if( length(errorCovariance) > 0 ) {
+			status("Processing the error covariance matrix");
+			eig = eigen(errorCovariance, symmetric = TRUE)
+			d = eig$values;
+			v = eig$vectors;
+			#  errorCovariance == v %*% diag(d) %*% t(v)
+			#  errorCovariance^0.5 == v*sqrt(d)*v" (no single quotes anymore)
+			#  errorCovariance^(-0.5) == v*diag(1./sqrt(diag(d)))*v"
+			if( any(d<=0) ) {
+				stop("The covariance matrix is not positive definite");
+			}
+			correctionMatrix = v %*% diag(1./sqrt(d)) %*% t(v);
+			rm( eig, v, d, errorCovariance )
+		} else {
+			rm( errorCovariance );
+			correctionMatrix = numeric();
+		}
+	}
+	################################# Matching gene and SNPs locations ######################
+	if( pvOutputThreshold.cis > 0 ) {
+		status("Matching data files and location files")
+		
+		# names in the input data	
+		gene_names = gene$GetAllRowNames();
+		snps_names = snps$GetAllRowNames();
+		
+		# gene range, set: left<right
+		if(any(genepos[,3] > genepos[,4])) {
+			temp3 = genepos[,3];
+			temp4 = genepos[,4];
+			genepos[,3] = pmin(temp3,temp4);
+			genepos[,4] = pmax(temp3,temp4);
+			rm(temp3, temp4);
+		}
+		
+		# match with the location data
+		genematch = match( gene_names, genepos[ ,1],  nomatch = 0L);
+		usedgene = matrix(FALSE, nrow(genepos), 1); # genes in "genepos" that are matching  "gene_names"
+		usedgene[ genematch ] = TRUE;
+		if( !any(genematch) ) {
+			stop("Gene names do not match those in the gene location file.");
+		}
+		cat( sum(genematch>0), "of", length(gene_names), " genes matched\n");
+		
+		
+		snpsmatch = match( snps_names, snpspos[ ,1],  nomatch = 0L);
+		usedsnps = matrix(FALSE, nrow(snpspos),1);
+		usedsnps[ snpsmatch ] = TRUE;
+		if( !any(snpsmatch) ) {
+			stop("SNP names do not match those in the SNP location file.");
+		}
+		cat( sum(snpsmatch>0), "of", length(snps_names), " SNPs matched\n");
+		
+		# list used chr names
+		chrNames = unique(c( as.character(unique(snpspos[usedsnps,2])), 
+												 as.character(unique(genepos[usedgene,2])) ))
+		chrNames = chrNames[ sort.list( suppressWarnings(as.integer(chrNames)), 
+																		method = "radix", na.last = TRUE ) ];
+		# match chr names
+		genechr = match(genepos[,2],chrNames);
+		snpschr = match(snpspos[,2],chrNames);
+		
+		# max length of a chromosome
+		chrMax = max( snpspos[usedsnps, 3], genepos[usedgene, 4], na.rm = TRUE) + cisDist;
+		
+		# Single number location for all rows in "genepos" and "snpspos"
+ 		genepos2 = as.matrix(genepos[ ,3:4, drop = FALSE] + (genechr-1)*chrMax);
+ 		snpspos2 = as.matrix(snpspos[ ,3  , drop = FALSE] + (snpschr-1)*chrMax);
+		
+		# the final location arrays;
+		snps_pos = matrix(0,length(snps_names),1);
+		snps_pos[snpsmatch>0, ] = snpspos2[snpsmatch, , drop = FALSE];
+		snps_pos[rowSums(is.na(snps_pos))>0, ] = 0;
+		snps_pos[snps_pos==0] = (length(chrNames)+1) * (chrMax+cisDist);
+		rm(snps_names, snpsmatch, usedsnps, snpschr, snpspos2)
+		
+		gene_pos = matrix(0,length(gene_names),2);
+		gene_pos[genematch>0, ] = genepos2[genematch, , drop = FALSE];
+		gene_pos[rowSums(is.na(gene_pos))>0, ] = 0;
+		gene_pos[gene_pos==0] = (length(chrNames)+2) * (chrMax+cisDist);
+		rm(gene_names, genematch, usedgene, genechr, genepos2)
+		rm(chrNames, chrMax);
+
+		if( is.unsorted(snps_pos) ) {
+			status("Reordering SNPs\n");
+			ordr = sort.list(snps_pos);
+			snps$RowReorder(ordr);
+			snps_pos = snps_pos[ordr, , drop = FALSE];
+			rm(ordr);
+		}
+		if( is.unsorted(rowSums(gene_pos)) ) {
+			status("Reordering genes\n");
+			ordr = sort.list(rowSums(gene_pos));
+			gene$RowReorder(ordr);
+			gene_pos = gene_pos[ordr, , drop = FALSE];
+			rm(ordr);
+		}
+		
+		# Slice it back.
+		geneloc = vector("list", gene$nSlices())
+		gene_offset = 0;
+		for(gc in 1:gene$nSlices()) {
+			nr = length(gene$rowNameSlices[[gc]]);
+			geneloc[[gc]] = gene_pos[gene_offset + (1:nr), , drop = FALSE];
+			gene_offset = gene_offset + nr;	
+		}
+		rm(gc, gene_offset, gene_pos);
+		
+		snpsloc = vector("list", snps$nSlices())
+		snps_offset = 0;
+		for(sc in 1:snps$nSlices()) {
+			nr = length(snps$rowNameSlices[[sc]]);
+			snpsloc[[sc]] = snps_pos[snps_offset + (1:nr), , drop = FALSE];
+			snps_offset = snps_offset + nr;	
+		}
+		rm(nr, sc, snps_offset, snps_pos);
+	}
+	################################# Covariates processing #################################
+	{	
+		status("Processing covariates");
+		if( useModel == modelLINEAR_CROSS ) {
+			last.covariate = as.vector(tail( cvrt$getSlice(cvrt$nSlices()), n = 1));
+		}		
+		if( cvrt$nRows()>0 ) {
+			cvrt$SetNanRowMean();
+			cvrt$CombineInOneSlice();
+			cvrt = rbind(matrix(1,1,snps$nCols()),cvrt$getSlice(1));
+		} else {
+			cvrt = matrix(1,1,snps$nCols());
+		}
+		# Correct for the error covariance structure
+		if( length(correctionMatrix)>0 ) {
+			cvrt = cvrt %*% correctionMatrix;
+		}
+		# Orthonormalize covariates
+		# status("Orthonormalizing covariates");
+		q = qr(t(cvrt));
+		if( min(abs(diag(qr.R(q)))) < .Machine$double.eps * snps$nCols() ) {
+			stop("Colinear or zero covariates detected");
+		}
+		cvrt = t( qr.Q(q) );
+		rm( q );
+	}
+	################################# Gene expression processing ############################
+	{
+		status("Processing gene expression data (imputation, residualization, etc.)");
+		# Impute gene expression
+		gene$SetNanRowMean();
+		# Correct for the error covariance structure
+		if( length(correctionMatrix)>0 ) {
+			gene$RowMatrixMultiply(correctionMatrix);
+		}
+		# Orthogonolize expression w.r.t. covariates
+		# status("Orthogonolizing expression w.r.t. covariates");
+		gene_offsets = double(gene$nSlices()+1);
+		for( sl in 1:gene$nSlices() ) {
+			slice = gene$getSlice(sl);
+			gene_offsets[sl+1] = gene_offsets[sl] + nrow(slice);
+			rowsq1 = rowSums(slice^2);
+			slice = slice - tcrossprod(slice,cvrt) %*% cvrt;
+			rowsq2 = rowSums(slice^2);
+			# kill rows colinear with the covariates
+			delete.rows = (rowsq2 <= rowsq1 * .Machine$double.eps );
+			slice[delete.rows] = 0;
+			div = sqrt( rowSums(slice^2) );
+			div[ div == 0 ] = 1;
+			gene.std$set(sl, div);
+			gene$setSlice(sl, slice / div);
+		}
+		rm(rowsq1, rowsq2, delete.rows, div);
+		rm( sl, slice );
+		#gene$RowRemoveZeroEps();
+	}
+	################################# snps_process, testfun, pvfun, threshfun, afun  ########
+	{
+		# snps_process - preprocess SNPs slice
+		#
+		# afun --- abs for signed stats, identity for non-negative
+		# threshfun --- internal stat threshold for given p-value
+		# testfun --- t or F statistic from the internal one
+		# pvfun --- p-value from the t or F statistic
+		
+		nSamples = snps$nCols();
+		nGenes = gene$nRows();
+		nSnps  = snps$nRows();
+		nCov = nrow(cvrt);
+		# nVarTested = length(snps_list); # set in case(useModel)
+		# dfNull = nSamples - nCov;
+		# d.f. of the full model
+		betafun = NULL;
+		
+		if( useModel == modelLINEAR ) {
+			snps_process = function(x) {
+				return( list(.SetNanRowMean(x)) );
+			};
+			nVarTested = 1;
+			dfFull = nSamples - nCov - nVarTested;
+			statistic.fun = function(mat_list) {
+				return( mat_list[[1]] );
+			}
+			afun = function(x) {return(abs(x))};
+			threshfun = function(pv) {
+				thr = qt(pv/2, dfFull, lower.tail = FALSE);
+				thr = thr^2;
+				thr = sqrt(  thr / (dfFull + thr) );
+				thr[pv >= 1] = 0;
+				thr[pv <= 0] = 1;
+				return( thr );
+			}
+			testfun = function(x) { return( x * sqrt( dfFull / (1 - .my.pmin(x^2,1))));	}
+			pvfun = function(x) { return( .pv.nz(pt(-abs(x),dfFull)*2)); }
+			thresh.cis = threshfun(pvOutputThreshold.cis);
+			thresh = threshfun(pvOutputThreshold);
+			betafun = function(stat, ss, gg, select) {
+				return(stat * gene.std$get(gg)[select[,1]] / snps.std$get(ss)[select[,2]]);
+			}
+		} else 
+		if( useModel == modelANOVA ) {
+			snps_process = function(x).SNP_process_split_for_ANOVA(x,n.anova.groups);
+			nVarTested = n.anova.groups - 1;
+			dfFull = nSamples - nCov - nVarTested;
+# 			statistic.fun = function(mat_list) {
+# 				return( mat_list[[1]]^2 + mat_list[[2]]^2 );
+# 			}
+			statistic.fun = function(mat_list) {
+				x = mat_list[[1]]^2;
+				for( j in 2:length(mat_list) )
+					x = x + mat_list[[j]]^2;
+				return( x );
+			}
+			afun = identity;
+			threshfun = function(pv) {
+				thr = qf(pv, nVarTested, dfFull, lower.tail = FALSE);
+				thr = thr / (dfFull/nVarTested + thr);
+				thr[pv >= 1] = 0;
+				thr[pv <= 0] = 1;
+				return( thr );
+			}
+			testfun = function(x) { return( x / (1 - .my.pmin(x,1)) * (dfFull/nVarTested) ); }
+			pvfun = function(x) { return( .pv.nz(pf(x, nVarTested, dfFull, lower.tail = FALSE)) ); }
+			thresh.cis = threshfun(pvOutputThreshold.cis);
+			thresh = threshfun(pvOutputThreshold);
+		} else 
+		if( useModel == modelLINEAR_CROSS ) {
+			last.covariate = as.vector( last.covariate );
+			snps_process = .SNP_process_split_for_LINEAR_CROSS = function(x) {
+				out = vector("list", 2);
+				out[[1]] = .SetNanRowMean(x);
+				out[[2]] = t( t(out[[1]]) * last.covariate );
+				return( out );
+			};
+			nVarTested = 1;
+			dfFull = nSamples - nCov - nVarTested - 1;
+			statistic.fun = function(mat_list) {
+				return( mat_list[[2]] / sqrt(1 - mat_list[[1]]^2) );
+			}
+			afun = function(x) {return(abs(x))};
+			threshfun = function(pv) {
+				thr = qt(pv/2, dfFull, lower.tail = FALSE);
+				thr = thr^2;
+				thr = sqrt(  thr / (dfFull + thr) );
+				thr[pv >= 1] = 0;
+				thr[pv <= 0] = 1;
+				return( thr );
+			}
+			testfun = function(x) { return( x * sqrt( dfFull / (1 - .my.pmin(x^2,1))));	}
+			pvfun = function(x) { return( .pv.nz(pt(-abs(x),dfFull)*2 )); }		
+			thresh.cis = threshfun(pvOutputThreshold.cis);
+			thresh = threshfun(pvOutputThreshold);				
+			betafun = function(stat, ss, gg, select) {
+				return(stat * gene.std$get(gg)[select[,1]] / snps.std$get(ss)[select[,2]]);
+			}
+		}
+		params$dfFull = dfFull;
+	}
+	################################# Saver class(es) creation ##############################
+	{
+		status("Creating output file(s)");
+		if(noFDRsaveMemory) {
+			if( pvOutputThreshold > 0 ) {
+				saver.tra = .OutputSaver_direct$new();
+			}
+			if( pvOutputThreshold.cis > 0 ) {
+				saver.cis = .OutputSaver_direct$new();
+			}
+		} else {
+			if( pvOutputThreshold > 0 ) {
+				saver.tra = .OutputSaver_FRD$new();
+			}
+			if( pvOutputThreshold.cis > 0 ) {
+				saver.cis = .OutputSaver_FRD$new();
+			}
+		}
+		if( pvOutputThreshold > 0 )
+			if( pvOutputThreshold * gene$nRows() * snps$nRows() > 1000000 )
+				if(!noFDRsaveMemory)
+					cat('Warning: pvOutputThreshold may be too large.\nExpected number of findings > ', 
+							pvOutputThreshold * gene$nRows() * snps$nRows(),'\n');
+		if( (useModel == modelLINEAR) || (useModel == modelLINEAR_CROSS) ) {
+			statistic_name = "t-stat";
+		} else if( useModel == modelANOVA ) {
+			statistic_name = "F-test";
+		}
+		if(!is.null(betafun))
+			statistic_name = paste('beta\t',statistic_name, sep='');
+		if( pvOutputThreshold > 0 )
+			saver.tra$start(output_file_name,     statistic_name, snps, gene, testfun, pvfun);
+		if( pvOutputThreshold.cis > 0 )
+			saver.cis$start(output_file_name.cis, statistic_name, snps, gene, testfun, pvfun);
+		rm( statistic_name );
+	}
+	################################# Some useful functions #################################
+	{
+		orthonormalize.snps = function(cursnps, ss) {
+			for(p in 1:length(cursnps)) {
+				if(length(correctionMatrix)>0) {
+					cursnps[[p]] = cursnps[[p]] %*% correctionMatrix;
+				}
+				cursnps[[p]] = cursnps[[p]] - tcrossprod(cursnps[[p]],cvrt) %*% cvrt;
+				for(w in .seq(1,p-1L))
+					cursnps[[p]] = cursnps[[p]] - rowSums(cursnps[[p]]*cursnps[[w]]) * cursnps[[w]];
+				div = sqrt( rowSums(cursnps[[p]]^2) );
+				div[ div == 0 ] = 1;
+				cursnps[[p]] = cursnps[[p]]/div;
+			}
+			snps.std$set(ss, div);
+			return(cursnps);
+		}
+# 		if( pvOutputThreshold.cis > 0 ) {
+# 			is.cis.pair = function(gg,ss) {
+# 				return(!( ( snpsloc[[ss]][1, 1] - tail( geneloc[[gg]][ , 2], n = 1L) > cisDist) |
+# 					    ( geneloc[[gg]][1, 1] - tail( snpsloc[[ss]]      , n = 1L) > cisDist) ) );
+# 			}
+# 		}
+ 		if( pvOutputThreshold.cis > 0 ) {
+# 			sn.l = sapply(snpsloc, function(x)x[1] );
+# 			sn.r = sapply(snpsloc, function(x)tail(x,1) );
+# 			ge.l = sapply(geneloc, function(x)x[1,1] );
+# 			ge.r = sapply(geneloc, function(x)x[nrow(x) , 2] );
+			sn.l = sapply(snpsloc, '[', 1 );
+			sn.r = sapply(snpsloc, tail, 1 );
+			ge.l = sapply(geneloc, '[', 1, 1 );
+			ge.r = sapply( lapply(geneloc, tail.matrix, 1 ), '[', 2);
+			gg.1 = findInterval( sn.l , ge.r + cisDist +1) + 1;
+# 			cat(gg.1,'\n')
+			gg.2 = findInterval( sn.r , ge.l - cisDist );
+# 			cat(gg.2,'\n')
+			rm(sn.l, sn.r, ge.l, ge.r);
+ 		}
+
+	}	
+	################################# Prepare counters and histogram bins ###################
+	{
+		pvbins = NULL; # bin edges for p-values
+		statbins = 0;  # bin edges for the test statistic (|t| or F)
+		do.hist = FALSE;
+		if( length(pvalue.hist) == 1 ) {
+			if(pvalue.hist == "qqplot") {
+				pvbins = c(0, 10^rev(seq(0, log10(.Machine$double.xmin)-1, -0.05)));
+			} else
+			if( is.numeric(pvalue.hist) ) {
+				pvbins = seq(from = 0, to = 1, length.out = pvalue.hist+1);
+			} else
+			if( pvalue.hist == TRUE ) {
+				pvbins = seq(from = 0, to = 1, length.out = 100+1);
+			}
+		} else
+		if( is.numeric(pvalue.hist) && (length(pvalue.hist) > 1) ) {
+			pvbins = pvalue.hist;
+		}
+		if( is.null(pvbins) && (pvalue.hist != FALSE) ) {
+			stop("Wrong value of pvalue.hist. Must be FALSE, TRUE, \"qqplot\", or numerical");
+		}
+		do.hist = !is.null(pvbins);
+		if( do.hist ) {
+			pvbins = sort(pvbins);
+			statbins = threshfun(pvbins);
+			if( pvOutputThreshold > 0) {
+				hist.all = .histogrammer$new(pvbins, statbins);
+			}
+			if( pvOutputThreshold.cis > 0) {
+				hist.cis = .histogrammer$new(pvbins, statbins);
+			}
+		}
+		rm( pvbins, statbins);
+		if(min.pv.by.genesnp) {
+			if( pvOutputThreshold > 0) {
+				minpv.tra = .minpvalue$new(snps,gene);
+			}
+			if( pvOutputThreshold.cis > 0) {
+				minpv.cis = .minpvalue$new(snps,gene);
+			}
+		}
+	}
+	################################# Main loop #############################################
+	{
+		beta = NULL;
+		n.tests.all = 0;
+		n.tests.cis = 0;
+		n.eqtls.tra = 0;
+		n.eqtls.cis = 0;
+		
+		status("Performing eQTL analysis");
+		# ss = 1; gg = 1;
+		# ss = snps$nSlices(); gg = gene$nSlices();
+		
+		snps_offset = 0;
+		for(ss in 1:snps$nSlices()) {
+# 		for(ss in 1:min(2,snps$nSlices())) { #for debug
+			cursnps = NULL;
+			nrcs = nrow(snps$getSliceRaw(ss));
+			
+			# loop only through the useful stuff
+			for(gg in if(pvOutputThreshold>0){1:gene$nSlices()}else{.seq(gg.1[ss],gg.2[ss])} ) {
+				gene_offset = gene_offsets[gg];
+				curgene = gene$getSlice(gg);
+				nrcg = nrow(curgene);
+				if(nrcg == 0) next;
+				
+				rp = "";
+				
+				statistic = NULL;
+				select.cis.raw = NULL;
+				## do cis analysis
+# 				if( (pvOutputThreshold.cis > 0) && ( is.cis.pair(gg, ss) ) ) {
+				if( (pvOutputThreshold.cis > 0) && (gg >= gg.1[ss]) && (gg <= gg.2[ss]) ) {
+					
+					if( is.null( statistic ) ) {
+						if( is.null( cursnps ) ) {
+							cursnps = orthonormalize.snps( snps_process( snps$getSlice(ss) ), ss );
+						}					
+						mat = vector("list", length(cursnps));
+						for(d in 1:length(cursnps)) {
+							mat[[d]] = tcrossprod(curgene, cursnps[[d]]);
+						}
+						statistic = statistic.fun( mat );
+						astatistic = afun(statistic);
+# 						rm(mat);
+					}
+					
+# 					sn.l = findInterval(geneloc[[gg]][ ,1] - cisDist-1  +1   , snpsloc[[ss]]);
+# 					sn.r = findInterval(geneloc[[gg]][ ,2] + cisDist    -1   , snpsloc[[ss]]);
+					sn.l = findInterval(geneloc[[gg]][ ,1] - cisDist-1, snpsloc[[ss]]);
+					sn.r = findInterval(geneloc[[gg]][ ,2] + cisDist, snpsloc[[ss]]);
+					xx = unlist(lapply(which(sn.r>sn.l),FUN=function(x){(sn.l[x]:(sn.r[x]-1))*nrow(statistic)+x}))
+					select.cis.raw = xx[ astatistic[xx] >= thresh.cis ];
+					select.cis = arrayInd(select.cis.raw, dim(statistic))
+					
+					n.tests.cis = n.tests.cis + length(xx);
+					n.eqtls.cis = n.eqtls.cis + length(select.cis.raw);
+					
+					if( do.hist )	
+						hist.cis$update(astatistic[xx]);
+					
+					if( min.pv.by.genesnp ) {
+	# 					minpv.cis$updatecis(ss, gg, arrayInd(xx, dim(statistic)), astatistic[xx])
+						temp = double(length(astatistic));
+						dim(temp) = dim(astatistic);
+						temp[xx] = astatistic[xx];
+						minpv.cis$update(ss, gg, temp);
+					}
+					
+					if(!is.null(betafun))
+						beta = betafun(mat[[length(mat)]][select.cis.raw], ss, gg, select.cis);
+										
+					saver.cis$update( snps_offset + select.cis[ , 2],
+														gene_offset + select.cis[ , 1],
+														statistic[select.cis.raw],
+														beta);
+					
+	# 				statistic.select.cis  = statistic[ select.cis ];
+	# 				test = testfun( statistic.select.cis );
+	# 				pv = pvfun(test);
+	# 				Saver.cis$WriteBlock( cbind(snps_offset + select.cis[ , 2], gene_offset + select.cis[ , 1], test, pv) );
+	# 				counter.cis$Update(gg, ss, select.cis, pv, n.tests = length(xx), if(do.hist) afun(statistic[xx]) )
+					rp = paste(rp, ", ", formatC(n.eqtls.cis, big.mark=",", format = "f", digits = 0), " cis-eQTLs", sep = "");
+				}
+				## do trans/all analysis
+				if(pvOutputThreshold>0) {
+					if( is.null( statistic ) ) {
+						if( is.null( cursnps ) ) {
+							cursnps = orthonormalize.snps( snps_process( snps$getSlice(ss) ), ss );
+						}
+						mat = vector("list", length(cursnps));
+						for(d in 1:length(cursnps)) {
+							mat[[d]] = tcrossprod(curgene, cursnps[[d]]);
+						}
+						statistic = statistic.fun( mat );
+						astatistic = afun(statistic);
+# 						rm(mat);
+					}
+	
+					if( do.hist )	
+						hist.all$update(astatistic);
+	
+					if(!is.null(select.cis.raw)) 
+						astatistic[xx] = -1;
+	# 					select.tra.raw = select.tra.raw[!(select.tra.raw %in% select.cis.raw)];
+					
+					select.tra.raw = which( astatistic >= thresh);
+					select.tra = arrayInd(select.tra.raw, dim(statistic))
+					
+					n.eqtls.tra = n.eqtls.tra + length(select.tra.raw);
+					n.tests.all = n.tests.all + length(statistic);
+
+					if(!is.null(betafun))
+						beta = betafun(mat[[length(mat)]][select.tra.raw], ss, gg, select.tra);
+										
+					saver.tra$update( snps_offset + select.tra[ , 2],
+														gene_offset + select.tra[ , 1],
+														statistic[select.tra.raw],
+														beta);
+					
+					if( min.pv.by.genesnp ) 
+						minpv.tra$update(ss, gg, astatistic)
+					
+	# 				statistic.select.tra = statistic[ select.tra ];
+	# 				test = testfun( statistic.select.tra );
+	# 				pv = pvfun( test );
+	# 				Saver$WriteBlock( cbind( snps_offset + select.tra[ , 2], gene_offset + select.tra[ , 1], test, pv) );
+	# 				counter$Update(gg, ss, select.tra, pv, n.tests = nrcs*nrcg, if(do.hist) afun(statistic) )
+					rp = paste(rp, ", ", formatC(n.eqtls.tra, big.mark=",", format = "f", digits = 0), if(pvOutputThreshold.cis > 0)" trans-"else" ","eQTLs", sep = "")
+				}
+	
+				#gene_offset = gene_offset + nrcg;
+				if( !is.null(statistic) ) {
+					per = 100*(gg/gene$nSlices() + ss-1) / snps$nSlices();
+					cat( formatC(floor(per*100)/100, format = "f", width = 5, digits = 2), "% done" , rp, "\n", sep = "");
+	 				flush.console();
+				}
+			} # gg in 1:gene$nSlices()
+			snps_offset = snps_offset + nrow(snps$getSliceRaw(ss));
+		} # ss in 1:snps$nSlices()
+	}
+	################################# Results collection ####################################
+	{
+		rez = list(time.in.sec = proc.time()[3] - start.time);
+		rez$param = params;
+		
+		if(pvOutputThreshold.cis > 0) {
+			rez.cis = list(ntests = n.tests.cis, neqtls = n.eqtls.cis);
+			rez.cis = c(rez.cis, saver.cis$getResults( gene, snps, n.tests.cis) );
+			if(do.hist)
+				rez.cis = c(rez.cis, hist.cis$getResults() );
+			if(min.pv.by.genesnp)
+				rez.cis = c(rez.cis, minpv.cis$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
+		}
+		
+		if(pvOutputThreshold>0) {
+			rez.all = list(ntests = n.tests.all, neqtls = n.eqtls.tra + n.eqtls.cis);
+			if(pvOutputThreshold.cis > 0) {
+				rez.tra = list(ntests = n.tests.all - n.tests.cis, neqtls = n.eqtls.tra);
+				rez.tra = c(rez.tra, saver.tra$getResults( gene, snps, n.tests.all - n.tests.cis) );
+			} else {
+				rez.all = c(rez.all, saver.tra$getResults( gene, snps, n.tests.all              ) );
+			}
+			if(do.hist) {
+				rez.all = c(rez.all, hist.all$getResults() );
+				if(pvOutputThreshold.cis > 0) {
+					rez.tra$hist.bins = rez.all$hist.bins;
+					rez.tra$hist.counts = rez.all$hist.counts - rez.cis$hist.counts;
+				}
+			}
+			if(min.pv.by.genesnp) {
+				if(pvOutputThreshold.cis > 0) {
+					rez.tra = c(rez.tra, minpv.tra$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
+				} else {
+					rez.all = c(rez.all, minpv.tra$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
+				}
+			}
+		}
+	
+		if(exists('rez.all')>0)
+			rez$all = rez.all;
+		if(exists('rez.tra')>0)
+			rez$trans = rez.tra;
+		if(exists('rez.cis')>0)
+			rez$cis = rez.cis;	
+		
+		class(rez) = c(class(rez),"MatrixEQTL");
+		status("");
+	}
+# 	cat('s std ',snps.std$get(1),'\n');
+# 	cat('g std ',gene.std$get(1),'\n');
+	################################# Results collection ####################################
+	return(rez);
+}
+
+.histme = function(m, name1, name2, ...) {
+	cnts = m$hist.counts;
+	bins = m$hist.bins;
+	ntst = m$ntests;
+	centers = 0.5 * (bins[-1L] + bins[-length(bins)]);
+	density = 0.5 / (bins[-1L] - centers) * cnts / ntst;
+	ntext = paste("P-value distribution for ", name1, formatC(ntst, big.mark=",", format = "f", digits = 0), name2, " gene-SNP pairs ",sep="");
+	r = structure(list(breaks = bins, counts = cnts, density = density,
+	      mids = centers, equidist = FALSE), class = "histogram");
+	plot(r, main = ntext, ylab = "Density", xlab = "P-values", ...)
+	abline( h = 1, col = "blue");
+	return(invisible());
+}
+
+.qqme = function(m, lcol, cex, pch, ...) {
+	cnts = m$hist.counts;
+	bins = m$hist.bins;
+	ntst = m$ntests;
+	
+	cusu = cumsum(cnts) / ntst;
+	ypos = bins[-1][is.finite(cusu)];
+	xpos = cusu[is.finite(cusu)];
+	lines(-log10(xpos), -log10(ypos), col = lcol, ...);
+# 	lines(xpos, ypos, col = lcol, ...);
+	if(length(m$eqtls$pvalue)==0)
+		return();
+	ypvs = -log10(m$eqtls$pvalue);
+	xpvs = -log10(1:length(ypvs) / ntst);
+	if(length(ypvs) > 1000) {
+		# need to filter a bit, make the plotting faster
+		levels = as.integer( xpvs/xpvs[1] * 1e3);
+		keep = c(TRUE, diff(levels)!=0);
+		levels = as.integer( ypvs/ypvs[1] * 1e3);
+		keep = keep | c(TRUE, diff(levels)!=0);
+		ypvs = ypvs[keep];
+		xpvs = xpvs[keep];
+		rm(keep)
+	}
+	points(xpvs, ypvs, col = lcol, pch = pch, cex = cex, ...);
+}
+
+plot.MatrixEQTL = function(x, cex = 0.5, pch = 19, xlim = NULL, ylim = NULL, ...) {
+	if( x$param$pvalue.hist == FALSE ) {
+		warning("Cannot plot p-value distribution: the information was not recorded.\nUse pvalue.hist!=FALSE.");
+		return(invisible());
+	}
+	if( x$param$pvalue.hist == "qqplot" ) {
+		xmin = 1/max(x$cis$ntests, x$all$ntests);
+		ymax = NULL;
+		if(!is.null(ylim)) {
+			ymax = ylim[2];
+		} else {
+			ymax = -log10(min( 
+					x$cis$eqtls$pvalue[1],   x$cis$hist.bins[  c(FALSE,x$cis$hist.counts>0)][1],
+					x$all$eqtls$pvalue[1],   x$all$hist.bins[  c(FALSE,x$all$hist.counts>0)][1],
+					x$trans$eqtls$pvalue[1], x$trans$hist.bins[c(FALSE,x$trans$hist.counts>0)][1],
+					na.rm = TRUE))+0.1;
+		}
+		if(ymax == 0) {
+			ymax = -log10(.Machine$double.xmin)
+		}
+		if(!is.null(ymax))
+			ylim = c(0,ymax);
+		
+		if(is.null(xlim))
+			xlim =  c(0, -log10(xmin/1.5));
+		
+		plot(numeric(),numeric(), xlab = "-Log10(p-value), theoretical",
+			ylab = "-Log10(p-value), observed",
+			xlim = c(0, -log10(xmin/1.5)),
+			ylim = ylim,
+			xaxs="i", yaxs="i", ...);
+		lines(c(0,1e3), c(0,1e3), col = "gray");
+		if((x$param$pvOutputThreshold > 0) && (x$param$pvOutputThreshold.cis > 0)) {
+			.qqme( x$cis, "red", cex, pch, ...);
+			.qqme( x$trans, "blue", cex, pch, ...);
+			title(paste("QQ-plot for",
+				formatC(x$cis$ntests, big.mark=",", format = "f", digits = 0),
+				"local and",
+				formatC(x$trans$ntests, big.mark=",", format = "f", digits = 0),
+				"distant gene-SNP p-values"));
+			lset = c(1,2,4);
+		} else
+		if(x$param$pvOutputThreshold.cis > 0) {
+			.qqme(x$cis, "red", cex, pch, ...);
+			title(paste("QQ-plot for",
+				formatC(x$cis$ntests, big.mark=",", format = "f", digits = 0),
+				"local gene-SNP p-values"));
+			lset = c(1,4);
+		} else {
+			.qqme(x$all, "blue", cex, pch, ...);
+			title(paste("QQ-plot for all",
+				formatC(x$all$ntests, big.mark=",", format = "f", digits = 0),
+				"gene-SNP p-values"));
+			lset = c(3,4);
+		}
+		legend("topleft",
+			c("Local p-values","Distant p-values","All p-values","diagonal")[lset],
+			col =      c("red","blue","blue","gray")[lset],
+			text.col = c("red","blue","blue","gray")[lset],
+			pch = 20, lwd = 1, pt.cex = c(1,1,1,0)[lset])
+	} else {
+		if((x$param$pvOutputThreshold > 0) && (x$param$pvOutputThreshold.cis > 0)) {
+			par(mfrow=c(2,1));
+			.histme(x$cis, "", " local", ...);
+			tran = list(hist.counts = x$all$hist.counts - x$cis$hist.counts,
+					hist.bins = x$all$hist.bins,
+					ntests =  x$all$ntests - x$cis$ntests);
+			.histme(x$trans,""," distant", ...);
+			par(mfrow=c(1,1));
+		} else
+		if(x$param$pvOutputThreshold.cis > 0) {
+			.histme(x$cis, "", " local", ...);
+		} else {
+			.histme(x$all, "all ", ""  , ...);
+		}
+	}
+	return(invisible());
+}
+
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/data/Covariates.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/data/Covariates.txt	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,3 @@
+id	Sam_01	Sam_02	Sam_03	Sam_04	Sam_05	Sam_06	Sam_07	Sam_08	Sam_09	Sam_10	Sam_11	Sam_12	Sam_13	Sam_14	Sam_15	Sam_16
+gender	0	0	0	0	0	0	0	0	1	1	1	1	1	1	1	1
+age	36	40	46	65	69	43	40	54	44	70	24	34	55	62	48	40
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/data/GE.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/data/GE.txt	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,11 @@
+geneid	Sam_01	Sam_02	Sam_03	Sam_04	Sam_05	Sam_06	Sam_07	Sam_08	Sam_09	Sam_10	Sam_11	Sam_12	Sam_13	Sam_14	Sam_15	Sam_16
+Gene_01	4.91	4.63	5.18	5.07	5.74	5.09	5.31	5.29	4.73	5.72	4.75	4.54	5.01	5.03	4.84	4.44
+Gene_02	13.78	13.14	13.18	13.04	12.85	13.07	13.09	12.83	14.94	13.86	15.26	14.73	14.08	14.33	14.72	14.97
+Gene_03	12.06	12.29	13.07	13.65	13.86	12.84	12.29	13.03	13.13	14.93	12.40	13.38	13.70	14.49	14.14	13.35
+Gene_04	11.63	11.88	12.74	12.66	13.16	11.99	11.97	12.81	11.74	13.29	10.88	11.37	12.09	12.50	11.40	11.45
+Gene_05	14.72	14.66	14.63	15.91	15.46	14.74	15.17	15.01	14.05	14.90	12.96	13.56	14.06	14.44	14.12	13.76
+Gene_06	12.29	12.23	12.47	13.21	12.63	12.18	12.44	12.45	13.22	12.70	12.93	12.84	13.20	13.19	12.64	12.80
+Gene_07	12.56	12.71	12.49	13.41	13.60	12.35	12.27	13.42	14.73	15.47	13.85	14.31	15.25	15.55	14.90	14.04
+Gene_08	12.27	12.58	12.61	13.02	12.86	12.32	12.30	13.19	15.21	14.60	14.72	14.56	14.98	14.92	14.53	14.72
+Gene_09	9.82	9.29	8.95	8.18	8.11	9.43	9.17	9.25	10.95	9.82	12.60	11.99	11.20	10.31	11.13	11.69
+Gene_10	14.24	14.52	14.62	13.65	13.46	14.04	13.97	13.73	12.29	12.01	13.47	13.30	12.34	12.23	12.80	12.47
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/data/SNP.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/data/SNP.txt	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,16 @@
+snpid	Sam_01	Sam_02	Sam_03	Sam_04	Sam_05	Sam_06	Sam_07	Sam_08	Sam_09	Sam_10	Sam_11	Sam_12	Sam_13	Sam_14	Sam_15	Sam_16
+Snp_01	2	0	2	0	2	1	2	1	1	1	2	2	1	2	2	1
+Snp_02	0	1	1	2	2	1	0	0	0	1	1	1	1	0	1	1
+Snp_03	1	0	1	0	1	1	1	1	0	1	1	0	1	1	1	2
+Snp_04	0	1	2	2	2	1	1	0	0	0	1	2	1	1	1	0
+Snp_05	1	1	2	1	1	2	1	1	0	1	1	2	0	1	2	1
+Snp_06	2	2	2	1	1	0	1	0	2	1	1	1	2	0	2	1
+Snp_07	1	1	2	2	0	1	1	1	1	0	2	2	0	1	1	1
+Snp_08	1	0	1	0	1	0	0	1	1	1	0	2	0	1	1	1
+Snp_09	2	1	2	2	0	1	1	0	2	1	1	0	1	1	0	0
+Snp_10	1	1	0	0	0	2	2	1	1	2	1	1	1	1	1	0
+Snp_11	2	2	2	0	2	1	1	2	1	2	0	1	0	1	1	2
+Snp_12	1	1	2	2	2	1	1	1	1	0	2	0	1	1	0	2
+Snp_13	0	1	1	1	1	1	2	1	2	2	0	0	0	1	1	1
+Snp_14	0	0	1	0	1	2	2	2	1	1	1	0	1	0	1	0
+Snp_15	1	2	1	2	2	1	2	1	1	2	1	1	1	2	0	2
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/data/geneloc.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/data/geneloc.txt	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,11 @@
+geneid	chr	left	right
+Gene_01	chr1	721289	731289
+Gene_02	chr1	752565	762565
+Gene_03	chr1	777121	787121
+Gene_04	chr1	785988	795988
+Gene_05	chr1	792479	802479
+Gene_06	chr1	798958	808958
+Gene_07	chr1	888658	898658
+Gene_08	chr1	918572	928572
+Gene_09	chr1	926430	936430
+Gene_10	chr1	1000000	1010000
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/data/snpsloc.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/data/snpsloc.txt	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,16 @@
+snpid	chr	pos
+Snp_01	chr1	721289
+Snp_02	chr1	752565
+Snp_03	chr1	777121
+Snp_04	chr1	785988
+Snp_05	chr1	792479
+Snp_06	chr1	798958
+Snp_07	chr1	888658
+Snp_08	chr1	918572
+Snp_09	chr1	926430
+Snp_10	chr1	947033
+Snp_11	chr2	721289
+Snp_12	chr2	752565
+Snp_13	chr2	777121
+Snp_14	chr2	785988
+Snp_15	chr2	792479
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/00Index
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/00Index	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,10 @@
+a.nocvrt         Test that Matrix eQTL gives correct estimates for additive linear model. Homoscedastic case without covariates.
+b.cvrt           Test that Matrix eQTL gives correct estimates for additive linear model. Homoscedastic case with covariates.
+c.weights        Test that Matrix eQTL gives correct estimates for additive linear model. Heteroscedastic case with covariates.
+d.ANOVA          Test that Matrix eQTL gives correct estimates for ANOVA linear model with 3 groups. Heteroscedastic case with covariates.
+d.ANOVA5         Test that Matrix eQTL gives correct estimates for ANOVA linear model with 5 groups. Heteroscedastic case with covariates.
+e.interaction    Test that Matrix eQTL gives correct estimates for linear model with genotype-covariate interaction. Heteroscedastic case with covariates.
+p.hist           Sample code for plotting histogram of p-values
+q.qqplot         Sample code for plotting QQ-plot of p-values
+sample.all       Sample code for eQTL analysis of the toy data set.
+sample.cis       Sample code for eQTL analysis of the toy data set with separate p-value thresholds and FDR estimation for cis- and trans-eQTLs.
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/a.nocvrt.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/a.nocvrt.r	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,56 @@
+library("MatrixEQTL");
+
+# Number of columns (samples)
+n = 100;
+
+
+
+
+
+
+
+
+
+
+# Generate the vectors with genotype and expression variables
+snps.mat = rnorm(n);
+gene.mat = rnorm(n) + 0.5 * snps.mat;
+
+# Create 3 SlicedData objects for the analysis
+snps1 = SlicedData$new( matrix( snps.mat, nrow = 1 ) );
+gene1 = SlicedData$new( matrix( gene.mat, nrow = 1 ) );
+cvrt1 = SlicedData$new();
+
+# Produce no output files
+filename = NULL; # tempfile()
+
+# Call the main analysis function
+me = Matrix_eQTL_main(
+	snps = snps1, 
+	gene = gene1, 
+	cvrt = cvrt1, 
+	output_file_name = filename, 
+	pvOutputThreshold = 1, 
+	useModel = modelLINEAR, 
+	errorCovariance = numeric(), 
+	verbose = TRUE,
+	pvalue.hist = FALSE );
+
+# Pull Matrix eQTL results - t-statistic and p-value
+beta = me$all$eqtls$beta;
+tstat = me$all$eqtls$statistic;
+pvalue = me$all$eqtls$pvalue;
+rez = c(beta = beta, tstat = tstat, pvalue = pvalue);
+# And compare to those from the linear regression in R
+{
+	cat("\n\n Matrix eQTL: \n"); 
+	print(rez);
+	cat("\n R summary(lm()) output: \n");
+	lmdl = lm( gene.mat ~ snps.mat );
+	
+	lmout = summary(lmdl)$coefficients[2,c("Estimate","t value","Pr(>|t|)")];
+	print( lmout );
+}
+
+# Results from Matrix eQTL and "lm" must agree
+stopifnot(all.equal(lmout, rez, check.attributes=FALSE));
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/b.cvrt.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/b.cvrt.r	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,56 @@
+library("MatrixEQTL");
+
+# Number of columns (samples)
+n = 100;
+
+# Number of covariates
+nc = 10;
+
+
+
+
+# Generate the covariates
+cvrt.mat = 2 + matrix(rnorm(n*nc), ncol = nc);
+
+# Generate the vectors with genotype and expression variables
+snps.mat = cvrt.mat %*% rnorm(nc) + rnorm(n);
+gene.mat = cvrt.mat %*% rnorm(nc) + rnorm(n) + 0.5 * snps.mat + 1;
+
+# Create 3 SlicedData objects for the analysis
+snps1 = SlicedData$new( matrix( snps.mat, nrow = 1 ) );
+gene1 = SlicedData$new( matrix( gene.mat, nrow = 1 ) );
+cvrt1 = SlicedData$new( t(cvrt.mat) );
+
+# Produce no output files
+filename = NULL; # tempfile()
+
+# Call the main analysis function
+me = Matrix_eQTL_main(
+	snps = snps1, 
+	gene = gene1, 
+	cvrt = cvrt1, 
+	output_file_name = filename, 
+	pvOutputThreshold = 1, 
+	useModel = modelLINEAR, 
+	errorCovariance = numeric(), 
+	verbose = TRUE,
+	pvalue.hist = FALSE );
+
+# Pull Matrix eQTL results - t-statistic and p-value
+beta = me$all$eqtls$beta;
+tstat = me$all$eqtls$statistic;
+pvalue = me$all$eqtls$pvalue;
+rez = c(beta = beta, tstat = tstat, pvalue = pvalue);
+# And compare to those from the linear regression in R
+{
+	cat("\n\n Matrix eQTL: \n"); 
+	print(rez);
+	cat("\n R summary(lm()) output: \n");
+	lmdl = lm( gene.mat ~ snps.mat + cvrt.mat );
+	
+	lmout = summary(lmdl)$coefficients[2,c("Estimate","t value","Pr(>|t|)")];
+	print( lmout );
+}
+
+# Results from Matrix eQTL and "lm" must agree
+stopifnot(all.equal(lmout, rez, check.attributes=FALSE));
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/c.weights.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/c.weights.r	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,56 @@
+library("MatrixEQTL");
+
+# Number of columns (samples)
+n = 100;
+
+# Number of covariates
+nc = 10;
+
+# Generate the standard deviation of the noise
+noise.std = 0.1 + rnorm(n)^2;
+
+# Generate the covariates
+cvrt.mat = 2 + matrix(rnorm(n*nc), ncol = nc);
+
+# Generate the vectors with genotype and expression variables
+snps.mat = cvrt.mat %*% rnorm(nc) + rnorm(n);
+gene.mat = cvrt.mat %*% rnorm(nc) + rnorm(n) * noise.std + 0.5 * snps.mat + 1;
+
+# Create 3 SlicedData objects for the analysis
+snps1 = SlicedData$new( matrix( snps.mat, nrow = 1 ) );
+gene1 = SlicedData$new( matrix( gene.mat, nrow = 1 ) );
+cvrt1 = SlicedData$new( t(cvrt.mat) );
+
+# Produce no output files
+filename = NULL; # tempfile()
+
+# Call the main analysis function
+me = Matrix_eQTL_main(
+	snps = snps1, 
+	gene = gene1, 
+	cvrt = cvrt1, 
+	output_file_name = filename, 
+	pvOutputThreshold = 1, 
+	useModel = modelLINEAR, 
+	errorCovariance = diag(noise.std^2), 
+	verbose = TRUE,
+	pvalue.hist = FALSE );
+
+# Pull Matrix eQTL results - t-statistic and p-value
+beta = me$all$eqtls$beta;
+tstat = me$all$eqtls$statistic;
+pvalue = me$all$eqtls$pvalue;
+rez = c(beta = beta, tstat = tstat, pvalue = pvalue);
+# And compare to those from the linear regression in R
+{
+	cat("\n\n Matrix eQTL: \n");
+	print(rez);
+	cat("\n R summary(lm()) output: \n");
+	lmdl = lm( gene.mat ~ snps.mat + cvrt.mat,
+						 weights = 1/noise.std^2 );
+	lmout = summary(lmdl)$coefficients[2,c("Estimate","t value","Pr(>|t|)")];
+	print( lmout );
+}
+
+# Results from Matrix eQTL and "lm" must agree
+stopifnot(all.equal(lmout, rez, check.attributes=FALSE));
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/d.ANOVA.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/d.ANOVA.r	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,56 @@
+library("MatrixEQTL");
+
+# Number of columns (samples)
+n = 100;
+
+# Number of covariates
+nc = 10;
+
+# Generate the standard deviation of the noise
+noise.std = 0.1 + rnorm(n)^2;
+
+# Generate the covariates
+cvrt.mat = 2 + matrix(rnorm(n*nc), ncol = nc);
+
+# Generate the vectors with single genotype and expression variables
+snps.mat = floor(runif(n, min = 0, max = 3));
+gene.mat = 1 + (snps.mat==1) + cvrt.mat %*% rnorm(nc) + rnorm(n) * noise.std;
+
+# Create 3 SlicedData objects for the analysis
+snps1 = SlicedData$new( matrix( snps.mat, nrow = 1 ) );
+gene1 = SlicedData$new( matrix( gene.mat, nrow = 1 ) );
+cvrt1 = SlicedData$new( t(cvrt.mat) );
+
+# Produce no output files
+filename = NULL; # tempfile()
+
+# Call the main analysis function
+me = Matrix_eQTL_main(
+	snps = snps1,
+	gene = gene1,
+	cvrt = cvrt1,
+	output_file_name = filename,
+	pvOutputThreshold = 1,
+	useModel = modelANOVA,
+	errorCovariance = diag(noise.std^2),
+	verbose = TRUE,
+	pvalue.hist = FALSE );
+
+# Pull Matrix eQTL results - t-statistic and p-value
+
+fstat = me$all$eqtls$statistic;
+pvalue = me$all$eqtls$pvalue;
+rez = c( Fstat = fstat, pvalue = pvalue);
+# And compare to those from ANOVA in R
+{
+	cat("\n\n Matrix eQTL: \n");
+	print(rez);
+	cat("\n R anova(lm()) output: \n");
+	lmdl = lm( gene.mat ~ cvrt.mat + factor(snps.mat),
+						 weights = 1/noise.std^2 );
+	lmout = anova(lmdl)[2, c("F value","Pr(>F)")];
+	print( lmout );
+}
+
+# Results from Matrix eQTL and "lm" must agree
+stopifnot(all.equal(lmout, rez, check.attributes=FALSE));
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/d.ANOVA5.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/d.ANOVA5.r	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,56 @@
+library("MatrixEQTL");
+			
+anova.groups = 5;
+options(MatrixEQTL.ANOVA.categories = anova.groups);
+
+# Number of columns (samples)
+n = 100;
+# Number of covariates
+nc = 10;
+# Generate the standard deviation of the noise
+noise.std = 0.1 + rnorm(n)^2;
+# Generate the covariates
+cvrt.mat = 2 + matrix(rnorm(n*nc), ncol = nc);
+
+# Generate the vectors with single genotype and expression variables
+snps.mat = floor(runif(n, min = 0, max = anova.groups));
+gene.mat = 1 + (snps.mat==1) + cvrt.mat %*% rnorm(nc) + rnorm(n) * noise.std;
+
+# Create 3 SlicedData objects for the analysis
+snps1 = SlicedData$new( matrix( snps.mat, nrow = 1 ) );
+gene1 = SlicedData$new( matrix( gene.mat, nrow = 1 ) );
+cvrt1 = SlicedData$new( t(cvrt.mat) );
+
+# Produce no output files
+filename = NULL; # tempfile()
+
+# Call the main analysis function
+me = Matrix_eQTL_main(
+  snps = snps1, 
+  gene = gene1, 
+  cvrt = cvrt1, 
+  output_file_name = filename, 
+  pvOutputThreshold = 1, 
+  useModel = modelANOVA, 
+  errorCovariance = diag(noise.std^2), 
+  verbose = TRUE,
+  pvalue.hist = FALSE );
+
+# Pull Matrix eQTL results - t-statistic and p-value
+
+fstat = me$all$eqtls$statistic;
+pvalue = me$all$eqtls$pvalue;
+rez = c( Fstat = fstat, pvalue = pvalue);
+# And compare to those from ANOVA in R
+{
+  cat("\n\n Matrix eQTL: \n"); 
+  print(rez);
+  cat("\n R anova(lm()) output: \n");
+	lmdl = lm( gene.mat ~ cvrt.mat + factor(snps.mat), 
+						 weights = 1/noise.std^2 );
+	lmout = anova(lmdl)[2, c("F value","Pr(>F)")];
+  print( lmout );
+}
+
+# Results from Matrix eQTL and "lm" must agree
+stopifnot(all.equal(lmout, rez, check.attributes=FALSE));
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/e.interaction.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/e.interaction.r	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,56 @@
+library("MatrixEQTL");
+
+# Number of columns (samples)
+n = 25;
+
+# Number of covariates
+nc = 10;
+
+# Generate the standard deviation of the noise
+noise.std = 0.1 + rnorm(n)^2;
+
+# Generate the covariates
+cvrt.mat = 2 + matrix(rnorm(n*nc), ncol = nc);
+
+# Generate the vectors with single genotype and expression variables
+snps.mat = cvrt.mat %*% rnorm(nc) + rnorm(n);
+gene.mat = cvrt.mat %*% rnorm(nc) + rnorm(n) * noise.std + 
+						1 + 0.5 * snps.mat + snps.mat * cvrt.mat[,nc];
+# Create 3 SlicedData objects for the analysis
+snps1 = SlicedData$new( matrix( snps.mat, nrow = 1 ) );
+gene1 = SlicedData$new( matrix( gene.mat, nrow = 1 ) );
+cvrt1 = SlicedData$new( t(cvrt.mat) );
+
+# Produce no output files
+filename = NULL; # tempfile()
+
+# Call the main analysis function
+me = Matrix_eQTL_main(
+	snps = snps1, 
+	gene = gene1, 
+	cvrt = cvrt1, 
+	output_file_name = filename, 
+	pvOutputThreshold = 1, 
+	useModel = modelLINEAR_CROSS, 
+	errorCovariance = diag(noise.std^2), 
+	verbose = TRUE,
+	pvalue.hist = FALSE );
+
+# Pull Matrix eQTL results - t-statistic and p-value
+beta = me$all$eqtls$beta;
+tstat = me$all$eqtls$statistic;
+pvalue = me$all$eqtls$pvalue;
+rez = c(beta = beta, tstat = tstat, pvalue = pvalue);
+# And compare to those from the linear regression in R
+{
+	cat("\n\n Matrix eQTL: \n"); 
+	print(rez);
+	cat("\n R summary(lm()) output: \n");
+	lmdl = lm( gene.mat ~ snps.mat + cvrt.mat + snps.mat*cvrt.mat[,nc],
+					   weights = 1/noise.std^2 );
+	lmout = tail(summary(lmdl)$coefficients,1)[,c(1,3,4)];
+	print( tail(lmout) );
+}
+
+# Results from Matrix eQTL and "lm" must agree
+stopifnot(all.equal(lmout, rez, check.attributes=FALSE));
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/p.hist.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/p.hist.r	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,43 @@
+library("MatrixEQTL")
+
+# Number of samples
+n = 100;
+
+# Number of variables
+ngs = 2000;
+
+# Common signal in all variables (population stratification)
+pop = 0.2 * rnorm(n);
+
+# data matrices
+snps.mat = matrix(rnorm(n*ngs), ncol = ngs) + pop;
+gene.mat = matrix(rnorm(n*ngs), ncol = ngs) + pop + snps.mat*((1:ngs)/ngs)^9/2;
+
+# data objects for Matrix eQTL engine
+snps1 = SlicedData$new( t( snps.mat ) );
+gene1 = SlicedData$new( t( gene.mat ) );
+cvrt1 = SlicedData$new();
+rm(snps.mat, gene.mat);
+
+# Slice data in blocks of 500 variables
+snps1$ResliceCombined(500);
+gene1$ResliceCombined(500);
+
+# Produce no output files
+filename = NULL; # tempfile()
+
+# Perform analysis recording information for a histogram
+me = Matrix_eQTL_main(
+	snps = snps1,
+	gene = gene1,
+	cvrt = cvrt1,
+	output_file_name = filename, 
+	pvOutputThreshold = 1e-100, 
+	useModel = modelLINEAR, 
+	errorCovariance = numeric(), 
+	verbose = TRUE,
+	pvalue.hist = 100);
+
+# png(filename = "histogram.png", width = 650, height = 650);
+plot(me, col="grey");
+# dev.off();
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/q.qqplot.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/q.qqplot.r	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,43 @@
+library("MatrixEQTL")
+
+# Number of samples
+n = 100;
+
+# Number of variables
+ngs = 2000;
+
+# Common signal in all variables (population stratification)
+pop = 0.2 * rnorm(n);
+
+# data matrices
+snps.mat = matrix(rnorm(n*ngs), ncol = ngs) + pop;
+gene.mat = matrix(rnorm(n*ngs), ncol = ngs) + pop + snps.mat*((1:ngs)/ngs)^9/2;
+
+# data objects for Matrix eQTL engine
+snps1 = SlicedData$new( t( snps.mat ) );
+gene1 = SlicedData$new( t( gene.mat ) );
+cvrt1 = SlicedData$new();
+rm(snps.mat, gene.mat);
+
+# Slice data in blocks of 500 variables
+snps1$ResliceCombined(500);
+gene1$ResliceCombined(500);
+
+# Produce no output files
+filename = NULL; # tempfile()
+
+# Perform analysis recording information for a Q-Q plot
+me = Matrix_eQTL_main(
+	snps = snps1, 
+	gene = gene1, 
+	cvrt = cvrt1, 
+	output_file_name = filename,
+	pvOutputThreshold = 1e-6, 
+	useModel = modelLINEAR, 
+	errorCovariance = numeric(), 
+	verbose = TRUE,
+	pvalue.hist = "qqplot");
+
+# png(filename = "QQplot.png", width = 650, height = 650);
+plot(me, pch = 16, cex = 0.7, ylim = c(0,11));
+# dev.off();
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/sample.all.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/sample.all.r	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,96 @@
+# Matrix eQTL by Andrey A. Shabalin
+# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
+# 
+# Be sure to use an up to date version of R and Matrix eQTL.
+
+# source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
+library(MatrixEQTL)
+
+## Location of the package with the data files.
+base.dir = find.package('MatrixEQTL');
+# base.dir = '.';
+
+## Settings
+
+# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
+useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
+
+# Genotype file name
+SNP_file_name = paste(base.dir, "/data/SNP.txt", sep="");
+
+# Gene expression file name
+expression_file_name = paste(base.dir, "/data/GE.txt", sep="");
+
+# Covariates file name
+# Set to character() for no covariates
+covariates_file_name = paste(base.dir, "/data/Covariates.txt", sep="");
+
+# Output file name
+output_file_name = tempfile();
+
+# Only associations significant at this level will be saved
+pvOutputThreshold = 1e-2;
+
+# Error covariance matrix
+# Set to numeric() for identity.
+errorCovariance = numeric();
+# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
+
+
+## Load genotype data
+
+snps = SlicedData$new();
+snps$fileDelimiter = "\t";      # the TAB character
+snps$fileOmitCharacters = "NA"; # denote missing values;
+snps$fileSkipRows = 1;          # one row of column labels
+snps$fileSkipColumns = 1;       # one column of row labels
+snps$fileSliceSize = 2000;      # read file in slices of 2,000 rows
+snps$LoadFile(SNP_file_name);
+
+## Load gene expression data
+
+gene = SlicedData$new();
+gene$fileDelimiter = "\t";      # the TAB character
+gene$fileOmitCharacters = "NA"; # denote missing values;
+gene$fileSkipRows = 1;          # one row of column labels
+gene$fileSkipColumns = 1;       # one column of row labels
+gene$fileSliceSize = 2000;      # read file in slices of 2,000 rows
+gene$LoadFile(expression_file_name);
+
+## Load covariates
+
+cvrt = SlicedData$new();
+cvrt$fileDelimiter = "\t";      # the TAB character
+cvrt$fileOmitCharacters = "NA"; # denote missing values;
+cvrt$fileSkipRows = 1;          # one row of column labels
+cvrt$fileSkipColumns = 1;       # one column of row labels
+if(length(covariates_file_name)>0) {
+	cvrt$LoadFile(covariates_file_name);
+}
+
+## Run the analysis
+
+me = Matrix_eQTL_engine(
+		snps = snps,
+		gene = gene,
+		cvrt = cvrt,
+		output_file_name = output_file_name,
+		pvOutputThreshold = pvOutputThreshold,
+		useModel = useModel, 
+		errorCovariance = errorCovariance, 
+		verbose = TRUE,
+		pvalue.hist = TRUE,
+		min.pv.by.genesnp = FALSE,
+		noFDRsaveMemory = FALSE);
+
+unlink(output_file_name);
+
+## Results:
+
+cat('Analysis done in: ', me$time.in.sec, ' seconds', '\n');
+cat('Detected eQTLs:', '\n');
+show(me$all$eqtls)
+
+## Plot the histogram of all p-values
+
+plot(me)
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/demo/sample.cis.r
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/demo/sample.cis.r	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,112 @@
+# Matrix eQTL by Andrey A. Shabalin
+# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
+# 
+# Be sure to use an up to date version of R and Matrix eQTL.
+
+# source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
+library(MatrixEQTL)
+
+## Location of the package with the data files.
+base.dir = find.package('MatrixEQTL');
+# base.dir = '.';
+
+## Settings
+
+# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
+useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
+
+# Genotype file name
+SNP_file_name = paste(base.dir, "/data/SNP.txt", sep="");
+snps_location_file_name = paste(base.dir, "/data/snpsloc.txt", sep="");
+
+# Gene expression file name
+expression_file_name = paste(base.dir, "/data/GE.txt", sep="");
+gene_location_file_name = paste(base.dir, "/data/geneloc.txt", sep="");
+
+# Covariates file name
+# Set to character() for no covariates
+covariates_file_name = paste(base.dir, "/data/Covariates.txt", sep="");
+
+# Output file name
+output_file_name_cis = tempfile();
+output_file_name_tra = tempfile();
+
+# Only associations significant at this level will be saved
+pvOutputThreshold_cis = 2e-2;
+pvOutputThreshold_tra = 1e-2;
+
+# Error covariance matrix
+# Set to numeric() for identity.
+errorCovariance = numeric();
+# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
+
+# Distance for local gene-SNP pairs
+cisDist = 1e6;
+
+## Load genotype data
+
+snps = SlicedData$new();
+snps$fileDelimiter = "\t";      # the TAB character
+snps$fileOmitCharacters = "NA"; # denote missing values;
+snps$fileSkipRows = 1;          # one row of column labels
+snps$fileSkipColumns = 1;       # one column of row labels
+snps$fileSliceSize = 2000;      # read file in slices of 2,000 rows
+snps$LoadFile(SNP_file_name);
+
+## Load gene expression data
+
+gene = SlicedData$new();
+gene$fileDelimiter = "\t";      # the TAB character
+gene$fileOmitCharacters = "NA"; # denote missing values;
+gene$fileSkipRows = 1;          # one row of column labels
+gene$fileSkipColumns = 1;       # one column of row labels
+gene$fileSliceSize = 2000;      # read file in slices of 2,000 rows
+gene$LoadFile(expression_file_name);
+
+## Load covariates
+
+cvrt = SlicedData$new();
+cvrt$fileDelimiter = "\t";      # the TAB character
+cvrt$fileOmitCharacters = "NA"; # denote missing values;
+cvrt$fileSkipRows = 1;          # one row of column labels
+cvrt$fileSkipColumns = 1;       # one column of row labels
+if(length(covariates_file_name)>0) {
+	cvrt$LoadFile(covariates_file_name);
+}
+
+## Run the analysis
+snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
+genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
+
+me = Matrix_eQTL_main(
+		snps = snps, 
+		gene = gene, 
+		cvrt = cvrt,
+		output_file_name     = output_file_name_tra,
+		pvOutputThreshold     = pvOutputThreshold_tra,
+		useModel = useModel, 
+		errorCovariance = errorCovariance, 
+		verbose = TRUE, 
+		output_file_name.cis = output_file_name_cis,
+		pvOutputThreshold.cis = pvOutputThreshold_cis,
+		snpspos = snpspos, 
+		genepos = genepos,
+		cisDist = cisDist,
+		pvalue.hist = TRUE,
+		min.pv.by.genesnp = TRUE,
+		noFDRsaveMemory = FALSE);
+
+unlink(output_file_name_tra);
+unlink(output_file_name_cis);
+
+## Results:
+
+cat('Analysis done in: ', me$time.in.sec, ' seconds', '\n');
+cat('Detected local eQTLs:', '\n');
+show(me$cis$eqtls)
+cat('Detected distant eQTLs:', '\n');
+show(me$trans$eqtls)
+
+## Plot the histogram of local and distant p-values
+
+plot(me)
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/inst/CITATION
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/inst/CITATION	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,15 @@
+citHeader("To cite the 'MatrixEQTL' package in publications use:")
+
+citEntry(entry="article",
+         title = "Matrix eQTL: ultra fast eQTL analysis via large matrix operations",
+         author = personList(as.person("A.A. Shabalin")),
+         address  = "University of North Carolina at Chapel Hill",
+         year     = "2012",
+         url      = "URL http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/",
+         publisher= "Oxford Univ Press",
+         journal  = "Bioinformatics",
+         volume   = "28",
+         number   = "10",
+         pages    = "1353--1358",
+         textVersion = "Shabalin, Andrey A. Matrix eQTL: ultra fast eQTL analysis via large matrix operations. Bioinformatics 28, no. 10 (2012): 1353-1358."
+)
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/Covariates.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/Covariates.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,25 @@
+\name{Covariates}
+\alias{Covariates}
+\docType{data}
+\title{
+	Artificial data for Matrix eQTL sample code: Covariates.
+}
+\description{
+	Artificial data set with 2 covariates across 15 samples. 
+	Columns of the file must match to those of the expression and genotype data sets.
+}
+\format{
+	\tabular{llll}{
+		id     \tab Sam_01   \tab Sam_02 \tab \ldots \cr
+		gender \tab 0        \tab 0      \tab \ldots \cr
+		age    \tab 40        \tab 46    \tab \ldots \cr
+		
+	}
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\seealso{
+	See \code{\link{Matrix_eQTL_engine}} for reference and sample code.
+}
+\keyword{datasets}
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/GE.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/GE.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,25 @@
+\name{GE}
+\alias{GE}
+\docType{data}
+\title{
+	Artificial data for Matrix eQTL sample code: Gene expression.
+}
+\description{
+	Artificial data set with expression of 10 genes across 15 samples. 
+	Columns of the file must match to those of the genotype and covariates data sets.
+}
+\format{
+	\tabular{llll}{
+		geneid  \tab Sam_01   \tab Sam_02 \tab \ldots \cr
+		Gene_01 \tab 4.91     \tab 4.63   \tab \ldots \cr
+		Gene_02 \tab 13.78    \tab 13.14  \tab \ldots \cr
+		\ldots  \tab \ldots   \tab \ldots \tab \ldots \cr
+	}
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\seealso{
+	See \code{\link{Matrix_eQTL_engine}} for reference and sample code.
+}
+\keyword{datasets}
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/MatrixEQTL-package.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/MatrixEQTL-package.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,40 @@
+\name{MatrixEQTL-package}
+\alias{MatrixEQTL-package}
+\alias{MatrixEQTL}
+\docType{package}
+\title{
+	Matrix eQTL: Ultra fast eQTL analysis via large matrix operations
+}
+\description{
+	Matrix eQTL is designed for fast eQTL analysis of large datasets.
+	Matrix eQTL can test for association between genotype and gene expression using linear regression 
+	with either additive or ANOVA (additive and dominant) genotype effects.
+	The models can include covariates to account for such factors as population stratification, gender, clinical variables, and surrogate variables.
+       Matrix eQTL also supports models with heteroscedastic and/or correlated errors,
+	false discovery rate estimation and separate treatment of local (cis) and distant (trans) eQTLs.
+}
+\details{
+	\tabular{ll}{
+		Package: \tab MatrixEQTL\cr
+		Type: \tab Package\cr
+		Version: \tab 2.1.1\cr
+		Date: \tab 2014-02-24\cr
+		License: \tab LGPL-3 \cr
+		LazyLoad: \tab yes\cr
+		Depends: \tab methods\cr
+	}
+}
+\author{
+	Andrey Shabalin \email{ashabalin@vcu.edu}
+	
+	Maintainer: Andrey Shabalin <ashabalin@vcu.edu>
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\keyword{ package }
+\keyword{ MatrixEQTL }
+\keyword{ Matrix eQTL }
+\seealso{
+	See \code{\link{Matrix_eQTL_engine}} for reference and sample code.
+}
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/MatrixEQTL_cis_code.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/MatrixEQTL_cis_code.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,129 @@
+\name{MatrixEQTL_cis_code}
+\alias{MatrixEQTL_cis_code}
+
+\title{Sample code for cis/trans-eQTL analysis with Matrix eQTL}
+\description{
+  The following code is the best starting point for those who want to perform cis-/trans-eQTL analysis with Matrix eQTL.
+}
+\references{
+  The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\seealso{
+  See \code{\link{Matrix_eQTL_engine}} for reference and other sample code.
+}
+\author{
+  Andrey Shabalin \email{ashabalin@vcu.edu}
+}
+\examples{
+# Matrix eQTL by Andrey A. Shabalin
+# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
+# 
+# Be sure to use an up to date version of R and Matrix eQTL.
+
+# source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
+library(MatrixEQTL)
+
+## Location of the package with the data files.
+base.dir = find.package('MatrixEQTL');
+
+## Settings
+
+# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
+useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
+
+# Genotype file name
+SNP_file_name = paste(base.dir, "/data/SNP.txt", sep="");
+snps_location_file_name = paste(base.dir, "/data/snpsloc.txt", sep="");
+
+# Gene expression file name
+expression_file_name = paste(base.dir, "/data/GE.txt", sep="");
+gene_location_file_name = paste(base.dir, "/data/geneloc.txt", sep="");
+
+# Covariates file name
+# Set to character() for no covariates
+covariates_file_name = paste(base.dir, "/data/Covariates.txt", sep="");
+
+# Output file name
+output_file_name_cis = tempfile();
+output_file_name_tra = tempfile();
+
+# Only associations significant at this level will be saved
+pvOutputThreshold_cis = 2e-2;
+pvOutputThreshold_tra = 1e-2;
+
+# Error covariance matrix
+# Set to numeric() for identity.
+errorCovariance = numeric();
+# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
+
+# Distance for local gene-SNP pairs
+cisDist = 1e6;
+
+## Load genotype data
+
+snps = SlicedData$new();
+snps$fileDelimiter = "\t";      # the TAB character
+snps$fileOmitCharacters = "NA"; # denote missing values;
+snps$fileSkipRows = 1;          # one row of column labels
+snps$fileSkipColumns = 1;       # one column of row labels
+snps$fileSliceSize = 2000;      # read file in slices of 2,000 rows
+snps$LoadFile(SNP_file_name);
+
+## Load gene expression data
+
+gene = SlicedData$new();
+gene$fileDelimiter = "\t";      # the TAB character
+gene$fileOmitCharacters = "NA"; # denote missing values;
+gene$fileSkipRows = 1;          # one row of column labels
+gene$fileSkipColumns = 1;       # one column of row labels
+gene$fileSliceSize = 2000;      # read file in slices of 2,000 rows
+gene$LoadFile(expression_file_name);
+
+## Load covariates
+
+cvrt = SlicedData$new();
+cvrt$fileDelimiter = "\t";      # the TAB character
+cvrt$fileOmitCharacters = "NA"; # denote missing values;
+cvrt$fileSkipRows = 1;          # one row of column labels
+cvrt$fileSkipColumns = 1;       # one column of row labels
+if(length(covariates_file_name)>0) {
+  cvrt$LoadFile(covariates_file_name);
+}
+
+## Run the analysis
+snpspos = read.table(snps_location_file_name, header = TRUE, stringsAsFactors = FALSE);
+genepos = read.table(gene_location_file_name, header = TRUE, stringsAsFactors = FALSE);
+
+me = Matrix_eQTL_main(
+    snps = snps, 
+    gene = gene, 
+    cvrt = cvrt,
+    output_file_name      = output_file_name_tra,
+    pvOutputThreshold     = pvOutputThreshold_tra,
+    useModel = useModel, 
+    errorCovariance = errorCovariance, 
+    verbose = TRUE, 
+    output_file_name.cis  = output_file_name_cis,
+    pvOutputThreshold.cis = pvOutputThreshold_cis,
+    snpspos = snpspos, 
+    genepos = genepos,
+    cisDist = cisDist,
+    pvalue.hist = TRUE,
+    min.pv.by.genesnp = FALSE,
+    noFDRsaveMemory = FALSE);
+
+unlink(output_file_name_tra);
+unlink(output_file_name_cis);
+
+## Results:
+
+cat('Analysis done in: ', me$time.in.sec, ' seconds', '\n');
+cat('Detected local eQTLs:', '\n');
+show(me$cis$eqtls)
+cat('Detected distant eQTLs:', '\n');
+show(me$trans$eqtls)
+
+## Make the histogram of local and distant p-values
+
+plot(me)
+}
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/Matrix_eQTL_main.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/Matrix_eQTL_main.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,273 @@
+\name{Matrix_eQTL_main}
+\alias{Matrix_eQTL_main}
+\alias{Matrix_eQTL_engine}
+\title{
+	Main function for fast eQTL analysis in MatrixEQTL package
+}
+\description{
+	\code{Matrix_eQTL_engine} function tests association of every row of the \code{snps} dataset with every row of the \code{gene} dataset using a linear regression model defined by the \code{useModel} parameter (see below).
+
+	The testing procedure accounts for extra covariates in \code{cvrt} parameter.
+	
+	The \code{errorCovariance} parameter can be set to the error variance-covariance matrix to account for heteroskedastic and/or correlated errors.
+	
+	Associations significant at \code{pvOutputThreshold} (\code{pvOutputThreshold.cis}) levels are saved to \code{output_file_name} (\code{output_file_name.cis}), with corresponding estimates of effect size (slope coefficient), test statistics, p-values, and q-values (false discovery rate).
+
+	Matrix eQTL can perform separate analysis for local (cis) and distant (trans) eQTLs.
+	For such analysis one has to set the cis-analysis specific parameters \code{pvOutputThreshold.cis > 0}, \code{cisDist}, \code{snpspos} and {genepos} in the call of \code{Matrix_eQTL_main} function.
+	A gene-SNP pair is considered local if the distance between them is less or equal to \code{cisDist}.
+	The genomic location of genes and SNPs is defined by data frames \code{snpspos} and {genepos}.
+	Depending on p-value thresholds \code{pvOutputThreshold} and \code{pvOutputThreshold.cis} Matrix eQTL runs in one of three different modes:
+	\itemize{
+		\item Set \code{pvOutputThreshold > 0} and \code{pvOutputThreshold.cis = 0} (or use \code{Matrix_eQTL_engine}) to perform eQTL analysis without using gene/SNP locations. Associations significant at the \code{pvOutputThreshold} level are be recorded in \code{output_file_name} and in the returned object.
+		\item Set \code{pvOutputThreshold = 0} and \code{pvOutputThreshold.cis > 0} to perform eQTL analysis for local gene-SNP pairs only. Local associations significant at \code{pvOutputThreshold.cis} level will be recorded in \code{output_file_name.cis} and in the returned object.
+		\item Set \code{pvOutputThreshold > 0} and \code{pvOutputThreshold.cis > 0} to perform eQTL analysis with separate p-value thresholds for local and distant eQTLs. Distant and local associations significant at corresponding thresholds are recorded in \code{output_file_name} and \code{output_file_name.cis} respectively and in the returned object. In this case the false discovery rate is calculated separately for these two sets of eQTLs.
+	}
+	\code{Matrix_eQTL_engine} is a wrapper for \code{Matrix_eQTL_main} for eQTL analysis without regard to gene/SNP location and provided for compatibility with the previous versions of the package.
+
+	The parameter \code{pvalue.hist} allows to record information sufficient to create a histogram or QQ-plot of all the p-values (see \code{\link[=plot.MatrixEQTL]{plot}}).
+}
+\usage{
+Matrix_eQTL_main( snps, 
+                  gene, 
+                  cvrt = SlicedData$new(), 
+                  output_file_name = "", 
+                  pvOutputThreshold = 1e-5,
+                  useModel = modelLINEAR, 
+                  errorCovariance = numeric(), 
+                  verbose = TRUE, 
+                  output_file_name.cis = "", 
+                  pvOutputThreshold.cis = 0,
+                  snpspos = NULL, 
+                  genepos = NULL,
+                  cisDist = 1e6,
+                  pvalue.hist = FALSE,
+                  min.pv.by.genesnp = FALSE,
+                  noFDRsaveMemory = FALSE)
+
+Matrix_eQTL_engine(snps, 
+                   gene, 
+                   cvrt = SlicedData$new(), 
+                   output_file_name, 
+                   pvOutputThreshold = 1e-5, 
+                   useModel = modelLINEAR, 
+                   errorCovariance = numeric(), 
+                   verbose = TRUE,
+                   pvalue.hist = FALSE,
+                   min.pv.by.genesnp = FALSE,
+                   noFDRsaveMemory = FALSE)
+}
+\arguments{
+	\item{snps}{
+		\code{\linkS4class{SlicedData}} object with genotype information. 
+		Can be real-valued for linear models and must take at most 3 distinct values for ANOVA unless the number of ANOVA categories is set to a higher number (see \code{useModel} parameter).
+	}
+	\item{gene}{
+		\code{\linkS4class{SlicedData}} object with gene expression information. 
+		Must have columns matching those of \code{snps}.
+	}
+	\item{cvrt}{
+		\code{\linkS4class{SlicedData}} object with additional covariates. 
+		Can be an empty \code{SlicedData} object in case of no covariates.
+		The constant is always included in the model and would cause an error if included in \code{cvrt}.
+		The order of columns must match those in \code{snps} and \code{gene}.
+	}
+	\item{output_file_name}{
+		\code{character}, \code{connection}, or \code{NULL}. 
+		If not \code{NULL}, significant associations are saved to this file (all significant associations if \code{pvOutputThreshold=0} or only distant if \code{pvOutputThreshold>0}).
+		If the file with this name exists, it is overwritten.
+	}
+	\item{output_file_name.cis}{
+		\code{character}, \code{connection}, or \code{NULL}.
+		If not \code{NULL}, significant local associations are saved to this file.
+		If the file with this name exists, it is overwritten.
+	}
+	\item{pvOutputThreshold}{
+		\code{numeric}. Significance threshold for all/distant tests.
+	}
+	\item{pvOutputThreshold.cis}{ 
+		\code{numeric}. Same as \code{pvOutputThreshold}, but for local eQTLs.
+	}
+	\item{useModel}{
+		\code{integer}. Eigher \code{modelLINEAR}, \code{modelANOVA}, or \code{modelLINEAR_CROSS}.
+		\enumerate{
+			\item Set \code{useModel = \link{modelLINEAR}} to model the effect of the genotype as additive linear and test for its significance using t-statistic.
+			\item Set \code{useModel = \link{modelANOVA}} to treat genotype as a categorical variables and use ANOVA model and test for its significance using F-test. The default number of ANOVA categories is 3. Set otherwise like this: \code{options(MatrixEQTL.ANOVA.categories=4)}.
+			\item Set \code{useModel = \link{modelLINEAR_CROSS}} to add a new term to the model
+			equal to the product of genotype and the last covariate; the significance of this term is then tested using t-statistic.
+		}
+
+	}
+	\item{errorCovariance}{
+		\code{numeric}. The error covariance matrix. Use \code{numeric()} for homoskedastic independent errors. 
+	}
+	\item{verbose}{
+		\code{logical}. Set to \code{TRUE} to display more detailed report about the progress.
+	}
+	\item{snpspos}{
+		\code{data.frame} object with information about SNP locations, must have 3 columns - SNP name, chromosome, and position, like this:
+		\tabular{ccc}{
+			snpid   \tab chr   		\tab pos 	   \cr
+			Snp_01  \tab 1     \tab 721289  \cr
+			Snp_02  \tab 1     \tab 752565  \cr
+			\ldots  \tab \ldots   \tab \ldots  \cr
+		}
+	}
+	\item{genepos}{
+		\code{data.frame} with information about transcript locations, must have 4 columns - the name, chromosome, and positions of the left and right ends, like this:
+		\tabular{cccc}{
+			geneid  \tab chr   		\tab left 	\tab right \cr
+			Gene_01 \tab 1     \tab 721289 \tab 731289 \cr
+			Gene_02 \tab   1     \tab 752565 \tab 762565 \cr
+			\ldots  \tab \ldots   \tab \ldots \tab \ldots \cr
+		}
+	}
+	\item{cisDist}{
+		\code{numeric}. SNP-gene pairs within this distance are considered local. The distance is measured from the nearest end of the gene. SNPs within a gene are always considered local.
+	}
+	\item{pvalue.hist}{
+		\code{logical}, \code{numerical}, or \code{"qqplot"}.
+		Defines whether and how the distribution of p-values is recorded in the returned object.
+		If \code{pvalue.hist = FALSE}, the information is not recorded and the analysis is performed faster. 
+		Alternatively, set \code{pvalue.hist = "qqplot"} to record information sufficient to create a QQ-plot of the p-values (use \code{\link[=plot.MatrixEQTL]{plot}} on the returned object to create the plot).
+		To record information for a histogram set \code{pvalue.hist} to the desired number of bins of equal size.	Finally, \code{pvalue.hist} can also be set to a custom set of bin edges.
+	}
+	\item{min.pv.by.genesnp}{
+		\code{logical}. Set \code{min.pv.by.genesnp = TRUE} to record the minimum p-value for each SNP and each gene in the returned object. The minimum p-values are recorded even if if they are above the corresponding thresholds of \code{pvOutputThreshold} and \code{pvOutputThreshold.cis}. The analysis runs faster when the parameter is set to \code{FALSE}. 
+	}
+	\item{noFDRsaveMemory}{
+		\code{logical}. Set \code{noFDRsaveMemory = TRUE} to save significant gene-SNP pairs directly to the output files, reduce memory footprint and skip FDR calculation. The eQTLs are not recorded in the returned object if \code{noFDRsaveMemory = TRUE}.
+	}
+}
+\details{
+	Note that the columns of \code{gene}, \code{snps}, and \code{cvrt} must match.
+	If they do not match in the input files, use \code{ColumnSubsample} method to subset and/or reorder them.
+}
+\value{
+	The detected eQTLs are saved in \code{output_file_name} and/or \code{output_file_name.cis} if they are not \code{NULL}.
+	The method also returns a list with a summary of the performed analysis.
+	\item{param}{Keeps all input parameters and also records the number of degrees of freedom for the full model.}
+	\item{time.in.sec}{Time difference between the start and the end of the analysis (in seconds).}
+	\item{all}{Information about all detected eQTLs.}
+	\item{cis}{Information about detected local eQTLs.}
+	\item{trans}{Information about detected distant eQTLs.}
+	The elements \code{all}, \code{cis}, and \code{trans} may contain the following components
+	\describe{
+		\item{\code{ntests}}{Total number of tests performed. This is used for FDR calculation.}
+		\item{\code{eqtls}}{Data frame with recorded significant associations. Not available if \code{noFDRsaveMemory=FALSE}}
+		\item{\code{neqtls}}{Number of significant associations recorded.}
+		\item{\code{hist.bins}}{Histogram bins used for recording p-value distribution. See \code{pvalue.hist} parameter.}
+		\item{\code{hist.counts}}{Number of p-value that fell in each histogram bin. See \code{pvalue.hist} parameter.}
+		\item{\code{min.pv.snps}}{Vector with the best p-value for each SNP. See \code{min.pv.by.genesnp} parameter.}
+		\item{\code{min.pv.gene}}{Vector with the best p-value for each gene. See \code{min.pv.by.genesnp} parameter.}
+	}
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\author{
+	Andrey Shabalin \email{ashabalin@vcu.edu}
+}
+\seealso{
+	The code below is the sample code for eQTL analysis NOT using gene/SNP locations.	
+	
+	See \code{\link{MatrixEQTL_cis_code}} for sample code for eQTL analysis that separates local and distant tests.
+}
+\examples{
+# Matrix eQTL by Andrey A. Shabalin
+# http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
+# 
+# Be sure to use an up to date version of R and Matrix eQTL.
+
+# source("Matrix_eQTL_R/Matrix_eQTL_engine.r");
+library(MatrixEQTL)
+
+## Location of the package with the data files.
+base.dir = find.package('MatrixEQTL');
+
+## Settings
+
+# Linear model to use, modelANOVA, modelLINEAR, or modelLINEAR_CROSS
+useModel = modelLINEAR; # modelANOVA, modelLINEAR, or modelLINEAR_CROSS
+
+# Genotype file name
+SNP_file_name = paste(base.dir, "/data/SNP.txt", sep="");
+
+# Gene expression file name
+expression_file_name = paste(base.dir, "/data/GE.txt", sep="");
+
+# Covariates file name
+# Set to character() for no covariates
+covariates_file_name = paste(base.dir, "/data/Covariates.txt", sep="");
+
+# Output file name
+output_file_name = tempfile();
+
+# Only associations significant at this level will be saved
+pvOutputThreshold = 1e-2;
+
+# Error covariance matrix
+# Set to numeric() for identity.
+errorCovariance = numeric();
+# errorCovariance = read.table("Sample_Data/errorCovariance.txt");
+
+
+## Load genotype data
+
+snps = SlicedData$new();
+snps$fileDelimiter = "\t";      # the TAB character
+snps$fileOmitCharacters = "NA"; # denote missing values;
+snps$fileSkipRows = 1;          # one row of column labels
+snps$fileSkipColumns = 1;       # one column of row labels
+snps$fileSliceSize = 2000;      # read file in slices of 2,000 rows
+snps$LoadFile(SNP_file_name);
+
+## Load gene expression data
+
+gene = SlicedData$new();
+gene$fileDelimiter = "\t";      # the TAB character
+gene$fileOmitCharacters = "NA"; # denote missing values;
+gene$fileSkipRows = 1;          # one row of column labels
+gene$fileSkipColumns = 1;       # one column of row labels
+gene$fileSliceSize = 2000;      # read file in slices of 2,000 rows
+gene$LoadFile(expression_file_name);
+
+## Load covariates
+
+cvrt = SlicedData$new();
+cvrt$fileDelimiter = "\t";      # the TAB character
+cvrt$fileOmitCharacters = "NA"; # denote missing values;
+cvrt$fileSkipRows = 1;          # one row of column labels
+cvrt$fileSkipColumns = 1;       # one column of row labels
+if(length(covariates_file_name)>0) {
+  cvrt$LoadFile(covariates_file_name);
+}
+
+## Run the analysis
+
+me = Matrix_eQTL_engine(
+    snps = snps,
+    gene = gene,
+    cvrt = cvrt,
+    output_file_name = output_file_name,
+    pvOutputThreshold = pvOutputThreshold,
+    useModel = useModel, 
+    errorCovariance = errorCovariance, 
+    verbose = TRUE,
+    pvalue.hist = TRUE,
+    min.pv.by.genesnp = FALSE,
+    noFDRsaveMemory = FALSE);
+		
+unlink(output_file_name);
+
+## Results:
+
+cat('Analysis done in: ', me$time.in.sec, ' seconds', '\n');
+cat('Detected eQTLs:', '\n');
+show(me$all$eqtls)
+
+## Plot the histogram of all p-values
+
+plot(me)
+
+}
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/SNP.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/SNP.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,25 @@
+\name{SNP}
+\alias{SNP}
+\docType{data}
+\title{
+	Artificial data for Matrix eQTL sample code: Genotype.
+}
+\description{
+	Artificial data set with genotype information for 15 markers across 15 samples.
+	Columns of the file must match to those of the gene expression and covariates data sets.
+}
+\format{
+	\tabular{llll}{
+		snpnid  \tab Sam_01   \tab Sam_02 \tab \ldots \cr
+		Snp_01  \tab 2        \tab 0      \tab \ldots \cr
+		Snp_02  \tab 0        \tab 1      \tab \ldots \cr
+		\ldots  \tab \ldots   \tab \ldots \tab \ldots \cr
+	}
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\seealso{
+	See \code{\link{Matrix_eQTL_engine}} for reference and sample code.
+}
+\keyword{datasets}
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/SlicedData-class.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/SlicedData-class.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,189 @@
+\name{SlicedData-class}
+\Rdversion{1.1}
+\docType{class}
+\alias{SlicedData-class}
+\alias{SlicedData}
+\alias{SlicedData-class}
+\alias{[[,SlicedData-method}
+\alias{[[<-,SlicedData-method}
+\alias{colnames,SlicedData-method}
+\alias{colnames<-,SlicedData-method}
+\alias{dim,SlicedData-method}
+\alias{length,SlicedData-method}
+\alias{ncol,SlicedData-method}
+\alias{NCOL,SlicedData-method}
+\alias{nrow,SlicedData-method}
+\alias{NROW,SlicedData-method}
+\alias{rownames,SlicedData-method}
+\alias{rownames<-,SlicedData-method}
+\alias{show,SlicedData-method}
+\alias{as.matrix,SlicedData-method}
+
+\alias{rowMeans,SlicedData-method}
+\alias{rowSums,SlicedData-method}
+\alias{colMeans,SlicedData-method}
+\alias{colSums,SlicedData-method}
+\alias{summary.SlicedData}
+
+\title{Class \code{SlicedData} for storing large matrices}
+\description{
+	This class is created for fast and memory efficient manipulations with large datasets presented in matrix form.
+	It is used to load, store, and manipulate large datasets, e.g. genotype and gene expression matrices.
+	When a dataset is loaded, it is sliced in blocks of 1,000 rows (default size).
+	This allows imputing, standardizing, and performing other operations with the data with minimal memory overhead.
+}
+\section{Extends}{
+	\code{SlicedData} is a reference classes (\code{\linkS4class{envRefClass}}).
+	Its methods can change the values of the fields of the class.
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\author{
+	Andrey Shabalin \email{ashabalin@vcu.edu}
+}
+\seealso{
+	This class is used to load data for eQTL analysis by \code{\link{Matrix_eQTL_engine}}.
+}
+\keyword{classes}
+\section{Fields}{
+	\describe{
+		\item{\code{dataEnv}:}{\code{environment}. Stores the slices of the data matrix. The slices should be accessed via \code{getSlice()} and \code{setSlice()} methods. }
+		\item{\code{nSlices1}:}{\code{numeric}. Number of slices. For internal use. The value should be access via \code{nSlices()} method. }
+		\item{\code{rowNameSlices}:}{\code{list}. Slices of row names. }
+		\item{\code{columnNames}:}{\code{character}. Column names. }
+		\item{\code{fileDelimiter}:}{\code{character}. Delimiter separating values in the input file. }
+		\item{\code{fileSkipColumns}:}{\code{numeric}. Number of columns with row labels in the input file. }
+		\item{\code{fileSkipRows}:}{\code{numeric}. Number of rows with column labels in the input file. }
+		\item{\code{fileSliceSize}:}{\code{numeric}. Maximum number of rows in a slice. }
+		\item{\code{fileOmitCharacters}:}{\code{character}. Missing value (NaN) representation in the input file. }
+	}
+}
+\section{Methods}{
+	\describe{
+		\item{\code{initialize(mat)}:}{ Create the object from a matrix. }
+		\item{\code{nSlices()}:}{ Returns the number of slices. }
+		\item{\code{nCols()}:}{ Returns the number of columns in the matrix. }
+		\item{\code{nRows()}:}{ Returns the number of rows in the matrix. }
+		\item{\code{Clear()}:}{ Clears the object. Removes the data slices and row and column names. }
+		\item{\code{Clone()}:}{ Makes a copy of the object. Changes to the copy do not affect the source object. }
+		\item{\code{CreateFromMatrix(mat)}:}{ Creates \code{SlicedData} object from a \code{\link[base]{matrix}}.}
+		\item{\code{LoadFile(filename, skipRows = NULL, skipColumns = NULL,} \cr 
+			\code{sliceSize = NULL, omitCharacters = NULL, delimiter = NULL, rowNamesColumn = 1)}:}{ Loads data matrix from a file. \code{filename} should be a character string. The remaining parameters specify the file format and have the same meaning as \code{file*} fields. Additional \code{rowNamesColumn} parameter specifies which of the columns of row labels to use as row names.}
+		\item{\code{SaveFile(filename)}:}{ Saves the data to a file. \code{filename} should be a character string.}
+		\item{\code{getSlice(sl)}:}{ Retrieves \code{sl}-th slice of the matrix. }
+		\item{\code{setSlice(sl, value)}:}{ Set \code{sl}-th slice of the matrix. }
+		\item{\code{ColumnSubsample(subset)}:}{ Reorders/subsets the columns according to \code{subset}. \cr
+Acts as \code{M = M[ ,subset]} for a matrix \code{M}. }
+		\item{\code{RowReorder(ordr)}:}{ Reorders rows according to \code{ordr}. \cr
+Acts as \code{M = M[ordr, ]} for a matrix \code{M}. }
+		\item{\code{RowMatrixMultiply(multiplier)}:}{ Multiply each row by the \code{multiplier}. \cr
+Acts as \code{M = M \%*\% multiplier} for a matrix \code{M}. }
+		\item{\code{CombineInOneSlice()}:}{ Combines all slices into one. The whole matrix can then be obtained via \code{$getSlice(1)}. }
+		\item{\code{IsCombined()}:}{ Returns \code{TRUE} if the number of slices is 1 or 0. }
+		\item{\code{ResliceCombined(sliceSize = -1)}:}{ Cuts the data into slices of \code{sliceSize} rows. If \code{sliceSize} is not defined, the value of \code{fileSliceSize} field is used.}
+		\item{\code{GetAllRowNames()}:}{ Returns all row names in one vector. }	
+		\item{\code{RowStandardizeCentered()}:}{ Set the mean of each row to zero and the sum of squares to one. }
+		\item{\code{SetNanRowMean()}:}{ Impute rows with row mean. Rows full of NaN values are imputed with zeros. }
+		\item{\code{RowRemoveZeroEps()}:}{ Removes rows of zeros and those that are nearly zero. }
+		\item{\code{FindRow(rowname)}:}{ Finds row by name. Returns a pair of slice number an row number within the slice. If no row is found, the function returns \code{NULL}. }
+		\item{\code{rowMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row means. Works as \link[base]{rowMeans} but requires \code{dims} to be equal to \code{1L}.}
+		\item{\code{rowSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row sums. Works as \link[base]{rowSums} but requires \code{dims} to be equal to \code{1L}.}
+		\item{\code{colMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column means. Works as \link[base]{colMeans} but requires \code{dims} to be equal to \code{1L}.}
+		\item{\code{colSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column sums. Works as \link[base]{colSums} but requires \code{dims} to be equal to \code{1L}.}
+	}
+}
+
+\usage{
+# x[[i]] indexing allows easy access to individual slices.
+# It is equivalent to x$GetSlice(i) and x$SetSlice(i,value)
+\S4method{[[}{SlicedData}(x, i)
+\S4method{[[}{SlicedData}(x, i) <- value
+
+# The following commands work as if x was a simple matrix object
+\S4method{nrow}{SlicedData}(x)
+\S4method{ncol}{SlicedData}(x)
+\S4method{dim}{SlicedData}(x)
+\S4method{rownames}{SlicedData}(x)
+\S4method{colnames}{SlicedData}(x)
+\S4method{rownames}{SlicedData}(x) <- value
+\S4method{colnames}{SlicedData}(x) <- value
+
+# SlicedData object can be easily transformed into a matrix
+# preserving row and column names
+\S4method{as.matrix}{SlicedData}(x)
+
+# length(x) can be used in place of x$nSlices()
+# to get the number of slices in the object
+\S4method{length}{SlicedData}(x)
+}
+\arguments{
+	\item{x}{
+		\code{\linkS4class{SlicedData}} object.
+	}
+	\item{i}{
+		Number of a slice.
+	}
+	\item{value}{
+		New content for the slice / new row or column names.
+	}
+}
+
+
+\examples{
+
+# Create a SlicedData variable
+sd = SlicedData$new();
+
+# Show the details of the empty object
+show(sd);
+
+# Create a matrix of values and assign to sd
+mat = matrix(1:12, 3, 4);
+rownames(mat) = c("row1","row2","row3");
+colnames(mat) = c("col1","col2","col3","col4");
+sd$CreateFromMatrix( mat );
+
+# Show the detail of the object (one slice)
+show(sd);
+
+# Slice it in pieces of 2 rows
+sd$ResliceCombined(sliceSize = 2L);
+
+# Show the number of slices (equivalent function calls)
+sd$nSlices()
+length(sd)
+
+# Is it all in one slice? (No)
+sd$IsCombined()
+
+# Show the column names (equivalent function calls)
+sd$columnNames
+colnames(sd)
+
+# Show row name slices
+sd$rowNameSlices
+
+# Show all row names (equivalent function calls)
+sd$GetAllRowNames()
+rownames(sd)
+
+# Print the second slice
+print( sd[[2]] )
+
+# Reorder and subset columns
+sd$ColumnSubsample( c(1,3,4) );
+
+# Reorder and subset rows
+sd$RowReorder( c(3,1) );
+
+# Show the detail of the object (one slice again)
+show(sd);
+
+# Is it all in one slice? (Yes)
+sd$IsCombined()
+
+# Find the row with name "row1" (it is second in the first slice)
+sd$FindRow("row1");
+
+}
\ No newline at end of file
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/figures/QQplot.png
Binary file MatrixEQTL/man/figures/QQplot.png has changed
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/figures/histogram.png
Binary file MatrixEQTL/man/figures/histogram.png has changed
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/geneloc.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/geneloc.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,31 @@
+\name{geneloc}
+\alias{geneloc}
+\docType{data}
+\title{
+	Artificial data for Matrix eQTL sample code: Gene location file.
+}
+\description{
+	Artificial Gene location file for 10 genes. 
+	\tabular{llll}{
+		geneid  \tab chr   		\tab left 	\tab right \cr
+		Gene_01 \tab chr1     \tab 721289 \tab 731289 \cr
+		Gene_02 \tab chr1     \tab 752565 \tab 762565 \cr
+		\ldots  \tab \ldots   \tab \ldots \tab \ldots \cr
+	}
+}
+\format{
+  A data frame with 4 columns.
+  \describe{
+    \item{\code{geneid}}{A column with gene names. The order does not have to match the gene expression data set.}
+    \item{\code{chr}}{Chromosome number, i.e. chr1.}
+    \item{\code{left}}{Lower (smaller) coordinate of the gene begining/end.}
+    \item{\code{right}}{Upper (larger) coordinate of the gene begining/end.}
+  }
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\seealso{
+	See \code{\link{Matrix_eQTL_engine}} for reference and sample code.
+}
+\keyword{datasets}
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/modelANOVA.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/modelANOVA.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,88 @@
+\name{modelANOVA}
+\alias{modelANOVA}
+\docType{data}
+\title{
+	Constant for \code{\link{Matrix_eQTL_engine}}.
+}
+\description{
+	Set parameter \code{useModel = modelANOVA} in the call of \code{\link{Matrix_eQTL_main}} to indicate that the genotype should be treated as a categorical variable.
+}
+\note{
+By default, the number of ANOVA categories is fixed to be 3.
+
+To set it to a different number (say, 4) use the following command:
+
+\code{options(MatrixEQTL.ANOVA.categories=4)}
+
+To check the current settings run:
+
+\code{getOption('MatrixEQTL.ANOVA.categories', 3);}
+}
+\examples{
+library('MatrixEQTL')	
+			
+# Number of columns (samples)
+n = 100;
+
+# Number of covariates
+nc = 10;
+
+# Generate the standard deviation of the noise
+noise.std = 0.1 + rnorm(n)^2;
+
+# Generate the covariates
+cvrt.mat = 2 + matrix(rnorm(n*nc), ncol = nc);
+
+# Generate the vectors with single genotype and expression variables
+snps.mat = floor(runif(n, min = 0, max = 3));
+gene.mat = 1 + (snps.mat==1) + cvrt.mat \%*\% rnorm(nc) + rnorm(n) * noise.std;
+
+# Create 3 SlicedData objects for the analysis
+snps1 = SlicedData$new( matrix( snps.mat, nrow = 1 ) );
+gene1 = SlicedData$new( matrix( gene.mat, nrow = 1 ) );
+cvrt1 = SlicedData$new( t(cvrt.mat) );
+
+# name of temporary output file
+filename = tempfile();
+
+snps1
+gene1
+
+# Call the main analysis function
+me = Matrix_eQTL_main(
+  snps = snps1, 
+  gene = gene1, 
+  cvrt = cvrt1, 
+  output_file_name = filename, 
+  pvOutputThreshold = 1, 
+  useModel = modelANOVA, 
+  errorCovariance = diag(noise.std^2), 
+  verbose = TRUE,
+  pvalue.hist = FALSE );
+# remove the output file
+unlink( filename );
+
+# Pull Matrix eQTL results - t-statistic and p-value
+
+fstat = me$all$eqtls$statistic;
+pvalue = me$all$eqtls$pvalue;
+rez = c( Fstat = fstat, pvalue = pvalue)
+# And compare to those from ANOVA in R
+{
+  cat('\n\n Matrix eQTL: \n'); 
+  print(rez);
+  cat('\n R anova(lm()) output: \n')
+  lmodel = lm( gene.mat ~ cvrt.mat + factor(snps.mat), weights = 1/noise.std^2 );
+  lmout = anova( lmodel )[2, 4:5];
+  print( lmout )
+}
+
+# Results from Matrix eQTL and 'lm' must agree
+stopifnot(all.equal(lmout, rez, check.attributes=FALSE))
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\seealso{
+	See \code{\link{Matrix_eQTL_engine}} for reference and sample code.
+}
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/modelLINEAR.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/modelLINEAR.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,75 @@
+\name{modelLINEAR}
+\alias{modelLINEAR}
+\docType{data}
+\title{
+	Constant for \code{\link{Matrix_eQTL_engine}}.
+}
+\description{
+	Set parameter \code{useModel = modelLINEAR} in the call of \code{\link{Matrix_eQTL_main}} to indicate that the effect of genotype on expression should be assumed to be additive linear.
+}
+
+\examples{
+library('MatrixEQTL')	
+
+# Number of columns (samples)
+n = 100;
+
+# Number of covariates
+nc = 10;
+
+# Generate the standard deviation of the noise
+noise.std = 0.1 + rnorm(n)^2;
+
+# Generate the covariates
+cvrt.mat = 2 + matrix(rnorm(n*nc), ncol = nc);
+
+# Generate the vectors with genotype and expression variables
+snps.mat = cvrt.mat \%*\% rnorm(nc) + rnorm(n);
+gene.mat = cvrt.mat \%*\% rnorm(nc) + rnorm(n) * noise.std + 0.5 * snps.mat + 1;
+
+# Create 3 SlicedData objects for the analysis
+snps1 = SlicedData$new( matrix( snps.mat, nrow = 1 ) );
+gene1 = SlicedData$new( matrix( gene.mat, nrow = 1 ) );
+cvrt1 = SlicedData$new( t(cvrt.mat) );
+
+# name of temporary output file
+filename = tempfile();
+
+# Call the main analysis function
+me = Matrix_eQTL_main(
+  snps = snps1, 
+  gene = gene1, 
+  cvrt = cvrt1, 
+  output_file_name = filename, 
+  pvOutputThreshold = 1, 
+  useModel = modelLINEAR, 
+  errorCovariance = diag(noise.std^2), 
+  verbose = TRUE,
+  pvalue.hist = FALSE );
+# remove the output file
+unlink( filename );
+
+# Pull Matrix eQTL results - t-statistic and p-value
+beta = me$all$eqtls$beta;
+tstat = me$all$eqtls$statistic;
+pvalue = me$all$eqtls$pvalue;
+rez = c(beta = beta, tstat = tstat, pvalue = pvalue)
+# And compare to those from the linear regression in R
+{
+  cat('\n\n Matrix eQTL: \n'); 
+  print(rez);
+  cat('\n R summary(lm()) output: \n');
+  lmodel = lm( gene.mat ~ snps.mat + cvrt.mat, weights = 1/noise.std^2 );
+  lmout = summary( lmodel )$coefficients[2, c(1,3,4)];
+  print( lmout )
+}
+
+# Results from Matrix eQTL and 'lm' must agree
+stopifnot(all.equal(lmout, rez, check.attributes=FALSE))
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\seealso{
+	See \code{\link{Matrix_eQTL_engine}} for reference and sample code.
+}
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/modelLINEAR_CROSS.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/modelLINEAR_CROSS.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,76 @@
+\name{modelLINEAR_CROSS}
+\alias{modelLINEAR_CROSS}
+\docType{data}
+\title{
+	Constant for \code{\link{Matrix_eQTL_engine}}.
+}
+\description{
+	Set parameter \code{useModel = modelLINEAR_CROSS} in the call of \code{\link{Matrix_eQTL_main}} to indicate that Matrix eQTL should include the interaction of SNP and last covariate in the model and test for its significance.
+}
+\examples{
+library('MatrixEQTL')	
+
+# Number of columns (samples)
+n = 25;
+
+# Number of covariates
+nc = 10;
+
+# Generate the standard deviation of the noise
+noise.std = 0.1 + rnorm(n)^2;
+
+# Generate the covariates
+cvrt.mat = 2 + matrix(rnorm(n*nc), ncol = nc);
+
+# Generate the vectors with single genotype and expression variables
+snps.mat = cvrt.mat \%*\% rnorm(nc) + rnorm(n);
+gene.mat = cvrt.mat \%*\% rnorm(nc) + rnorm(n) * noise.std + 
+           1 + 0.5 * snps.mat + snps.mat * cvrt.mat[,nc];
+
+# Create 3 SlicedData objects for the analysis
+snps1 = SlicedData$new( matrix( snps.mat, nrow = 1 ) );
+gene1 = SlicedData$new( matrix( gene.mat, nrow = 1 ) );
+cvrt1 = SlicedData$new( t(cvrt.mat) );
+
+# name of temporary output file
+filename = tempfile();
+
+# Call the main analysis function
+me = Matrix_eQTL_main(
+  snps = snps1, 
+  gene = gene1, 
+  cvrt = cvrt1, 
+  output_file_name = filename, 
+  pvOutputThreshold = 1, 
+  useModel = modelLINEAR_CROSS, 
+  errorCovariance = diag(noise.std^2), 
+  verbose = TRUE,
+  pvalue.hist = FALSE );
+# remove the output file
+unlink( filename );
+
+# Pull Matrix eQTL results - t-statistic and p-value
+beta = me$all$eqtls$beta;
+tstat = me$all$eqtls$statistic;
+pvalue = me$all$eqtls$pvalue;
+rez = c(beta = beta, tstat = tstat, pvalue = pvalue)
+# And compare to those from the linear regression in R
+{
+  cat('\n\n Matrix eQTL: \n'); 
+  print(rez);
+  cat('\n R summary(lm()) output: \n')
+  lmodel = lm( gene.mat ~ snps.mat + cvrt.mat + snps.mat*cvrt.mat[,nc], 
+               weights = 1/noise.std^2 );
+  lmout = tail(summary( lmodel )$coefficients,1)[,c(1,3,4)];
+  print( tail(lmout) );
+}
+
+# Results from Matrix eQTL and 'lm' must agree
+stopifnot(all.equal(lmout, rez, check.attributes=FALSE))
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\seealso{
+	See \code{\link{Matrix_eQTL_engine}} for reference and sample code.
+}
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/plot.MatrixEQTL.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/plot.MatrixEQTL.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,112 @@
+\name{plot.MatrixEQTL}
+\alias{plot.MatrixEQTL}
+\title{Plot histogram or QQ-plot of all p-values
+}
+\description{
+This method plots a histogram or QQ-plot of p-values
+for all tests performed by \code{\link{Matrix_eQTL_engine}}.
+}
+\usage{
+\method{plot}{MatrixEQTL}(x, cex = 0.5, pch = 19, xlim = NULL, ylim = NULL,...)
+}
+\arguments{
+	\item{x}{
+		An object returned by \code{\link{Matrix_eQTL_engine}}.
+	}
+	\item{cex}{
+		A numerical value giving the amount by which plotting text and symbols should be magnified relative to the default.
+	}
+	\item{pch}{
+		Plotting "character", i.e., symbol to use. See \code{\link[graphics]{points}}.
+	}
+	\item{xlim}{
+		Set the range of the horisontal axis.
+	}
+	\item{ylim}{
+		Set the range of the vertical axis.
+	}
+	\item{\dots}{
+		further graphical parameters passed to \code{\link[graphics]{lines}} and \code{\link[graphics]{points}}.
+	}
+}
+\details{
+	The plot type (histogram vs. QQ-plot) is determined by the \code{pvalue.hist} parameter in the call of \code{\link{Matrix_eQTL_engine}} function.
+}
+\value{
+	The method does not return any value.
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\seealso{
+	See \code{\link{Matrix_eQTL_engine}} for reference and sample code.
+}
+\note{
+	The sample code below produces figures like these:
+	
+	Histogram: \figure{histogram.png}
+		
+	QQ-plot: \figure{QQplot.png}
+}
+\author{
+	Andrey Shabalin \email{ashabalin@vcu.edu}
+}
+\examples{
+library(MatrixEQTL)
+
+# Number of samples
+n = 100; 
+
+# Number of variables
+ngs = 2000;
+
+# Common signal in all variables
+pop = 0.2*rnorm(n);
+
+# data matrices
+snps.mat = matrix(rnorm(n*ngs), ncol = ngs) + pop;
+gene.mat = matrix(rnorm(n*ngs), ncol = ngs) + pop + snps.mat*((1:ngs)/ngs)^9/2;
+
+# data objects for Matrix eQTL engine
+snps1 = SlicedData$new( t( snps.mat ) );
+gene1 = SlicedData$new( t( gene.mat ) );
+cvrt1 = SlicedData$new( );
+rm(snps.mat, gene.mat)
+
+# Slice data in blocks of 500 variables
+snps1$ResliceCombined(500);
+gene1$ResliceCombined(500);
+
+# Produce no output files
+filename = NULL; # tempfile()
+
+# Perform analysis recording information for a histogram
+meh = Matrix_eQTL_engine(
+    snps = snps1, 
+    gene = gene1, 
+    cvrt = cvrt1, 
+    output_file_name = filename, 
+    pvOutputThreshold = 1e-100, 
+    useModel = modelLINEAR, 
+    errorCovariance = numeric(), 
+    verbose = TRUE,
+    pvalue.hist = 100);
+    
+plot(meh, col="grey")
+
+# Perform analysis recording information for a QQ-plot
+meq = Matrix_eQTL_engine(
+    snps = snps1, 
+    gene = gene1, 
+    cvrt = cvrt1, 
+    output_file_name = filename,
+    pvOutputThreshold = 1e-6, 
+    useModel = modelLINEAR, 
+    errorCovariance = numeric(), 
+    verbose = TRUE,
+    pvalue.hist = "qqplot");
+    
+plot(meq)
+}
+\keyword{ QQ-plot }
+\keyword{ histogram }
diff -r 5fafba5359eb -r ae74f8fb3aef MatrixEQTL/man/snpsloc.Rd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/snpsloc.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,30 @@
+\name{snpsloc}
+\alias{snpsloc}
+\docType{data}
+\title{
+	Artificial data for Matrix eQTL sample code: SNP location file.
+}
+\description{
+	Artificial SNP location file for 15 markers. 
+	\tabular{llll}{
+		snpid   \tab chr   		\tab pos 	   \cr
+		Snp_01  \tab chr1     \tab 721289  \cr
+		Snp_02  \tab chr1     \tab 752565  \cr
+		\ldots  \tab \ldots   \tab \ldots  \cr
+	}
+}
+\format{
+  A data frame with 3 columns.
+  \describe{
+    \item{\code{snpid}}{A column with SNP names. The order does not have to match the genotype data set.}
+    \item{\code{chr}}{Chromosome number, i.e. chr1.}
+    \item{\code{pos}}{Coordinate of the SNP.}
+  }
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\seealso{
+	See \code{\link{Matrix_eQTL_engine}} for reference and sample code.
+}
+\keyword{datasets}