diff MatrixEQTL/man/SlicedData-class.Rd @ 3:ae74f8fb3aef draft

Uploaded
author jasonxu
date Fri, 12 Mar 2021 08:20:57 +0000
parents cd4c8e4a4b5b
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/MatrixEQTL/man/SlicedData-class.Rd	Fri Mar 12 08:20:57 2021 +0000
@@ -0,0 +1,189 @@
+\name{SlicedData-class}
+\Rdversion{1.1}
+\docType{class}
+\alias{SlicedData-class}
+\alias{SlicedData}
+\alias{SlicedData-class}
+\alias{[[,SlicedData-method}
+\alias{[[<-,SlicedData-method}
+\alias{colnames,SlicedData-method}
+\alias{colnames<-,SlicedData-method}
+\alias{dim,SlicedData-method}
+\alias{length,SlicedData-method}
+\alias{ncol,SlicedData-method}
+\alias{NCOL,SlicedData-method}
+\alias{nrow,SlicedData-method}
+\alias{NROW,SlicedData-method}
+\alias{rownames,SlicedData-method}
+\alias{rownames<-,SlicedData-method}
+\alias{show,SlicedData-method}
+\alias{as.matrix,SlicedData-method}
+
+\alias{rowMeans,SlicedData-method}
+\alias{rowSums,SlicedData-method}
+\alias{colMeans,SlicedData-method}
+\alias{colSums,SlicedData-method}
+\alias{summary.SlicedData}
+
+\title{Class \code{SlicedData} for storing large matrices}
+\description{
+	This class is created for fast and memory efficient manipulations with large datasets presented in matrix form.
+	It is used to load, store, and manipulate large datasets, e.g. genotype and gene expression matrices.
+	When a dataset is loaded, it is sliced in blocks of 1,000 rows (default size).
+	This allows imputing, standardizing, and performing other operations with the data with minimal memory overhead.
+}
+\section{Extends}{
+	\code{SlicedData} is a reference classes (\code{\linkS4class{envRefClass}}).
+	Its methods can change the values of the fields of the class.
+}
+\references{
+	The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
+}
+\author{
+	Andrey Shabalin \email{ashabalin@vcu.edu}
+}
+\seealso{
+	This class is used to load data for eQTL analysis by \code{\link{Matrix_eQTL_engine}}.
+}
+\keyword{classes}
+\section{Fields}{
+	\describe{
+		\item{\code{dataEnv}:}{\code{environment}. Stores the slices of the data matrix. The slices should be accessed via \code{getSlice()} and \code{setSlice()} methods. }
+		\item{\code{nSlices1}:}{\code{numeric}. Number of slices. For internal use. The value should be access via \code{nSlices()} method. }
+		\item{\code{rowNameSlices}:}{\code{list}. Slices of row names. }
+		\item{\code{columnNames}:}{\code{character}. Column names. }
+		\item{\code{fileDelimiter}:}{\code{character}. Delimiter separating values in the input file. }
+		\item{\code{fileSkipColumns}:}{\code{numeric}. Number of columns with row labels in the input file. }
+		\item{\code{fileSkipRows}:}{\code{numeric}. Number of rows with column labels in the input file. }
+		\item{\code{fileSliceSize}:}{\code{numeric}. Maximum number of rows in a slice. }
+		\item{\code{fileOmitCharacters}:}{\code{character}. Missing value (NaN) representation in the input file. }
+	}
+}
+\section{Methods}{
+	\describe{
+		\item{\code{initialize(mat)}:}{ Create the object from a matrix. }
+		\item{\code{nSlices()}:}{ Returns the number of slices. }
+		\item{\code{nCols()}:}{ Returns the number of columns in the matrix. }
+		\item{\code{nRows()}:}{ Returns the number of rows in the matrix. }
+		\item{\code{Clear()}:}{ Clears the object. Removes the data slices and row and column names. }
+		\item{\code{Clone()}:}{ Makes a copy of the object. Changes to the copy do not affect the source object. }
+		\item{\code{CreateFromMatrix(mat)}:}{ Creates \code{SlicedData} object from a \code{\link[base]{matrix}}.}
+		\item{\code{LoadFile(filename, skipRows = NULL, skipColumns = NULL,} \cr 
+			\code{sliceSize = NULL, omitCharacters = NULL, delimiter = NULL, rowNamesColumn = 1)}:}{ Loads data matrix from a file. \code{filename} should be a character string. The remaining parameters specify the file format and have the same meaning as \code{file*} fields. Additional \code{rowNamesColumn} parameter specifies which of the columns of row labels to use as row names.}
+		\item{\code{SaveFile(filename)}:}{ Saves the data to a file. \code{filename} should be a character string.}
+		\item{\code{getSlice(sl)}:}{ Retrieves \code{sl}-th slice of the matrix. }
+		\item{\code{setSlice(sl, value)}:}{ Set \code{sl}-th slice of the matrix. }
+		\item{\code{ColumnSubsample(subset)}:}{ Reorders/subsets the columns according to \code{subset}. \cr
+Acts as \code{M = M[ ,subset]} for a matrix \code{M}. }
+		\item{\code{RowReorder(ordr)}:}{ Reorders rows according to \code{ordr}. \cr
+Acts as \code{M = M[ordr, ]} for a matrix \code{M}. }
+		\item{\code{RowMatrixMultiply(multiplier)}:}{ Multiply each row by the \code{multiplier}. \cr
+Acts as \code{M = M \%*\% multiplier} for a matrix \code{M}. }
+		\item{\code{CombineInOneSlice()}:}{ Combines all slices into one. The whole matrix can then be obtained via \code{$getSlice(1)}. }
+		\item{\code{IsCombined()}:}{ Returns \code{TRUE} if the number of slices is 1 or 0. }
+		\item{\code{ResliceCombined(sliceSize = -1)}:}{ Cuts the data into slices of \code{sliceSize} rows. If \code{sliceSize} is not defined, the value of \code{fileSliceSize} field is used.}
+		\item{\code{GetAllRowNames()}:}{ Returns all row names in one vector. }	
+		\item{\code{RowStandardizeCentered()}:}{ Set the mean of each row to zero and the sum of squares to one. }
+		\item{\code{SetNanRowMean()}:}{ Impute rows with row mean. Rows full of NaN values are imputed with zeros. }
+		\item{\code{RowRemoveZeroEps()}:}{ Removes rows of zeros and those that are nearly zero. }
+		\item{\code{FindRow(rowname)}:}{ Finds row by name. Returns a pair of slice number an row number within the slice. If no row is found, the function returns \code{NULL}. }
+		\item{\code{rowMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row means. Works as \link[base]{rowMeans} but requires \code{dims} to be equal to \code{1L}.}
+		\item{\code{rowSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row sums. Works as \link[base]{rowSums} but requires \code{dims} to be equal to \code{1L}.}
+		\item{\code{colMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column means. Works as \link[base]{colMeans} but requires \code{dims} to be equal to \code{1L}.}
+		\item{\code{colSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column sums. Works as \link[base]{colSums} but requires \code{dims} to be equal to \code{1L}.}
+	}
+}
+
+\usage{
+# x[[i]] indexing allows easy access to individual slices.
+# It is equivalent to x$GetSlice(i) and x$SetSlice(i,value)
+\S4method{[[}{SlicedData}(x, i)
+\S4method{[[}{SlicedData}(x, i) <- value
+
+# The following commands work as if x was a simple matrix object
+\S4method{nrow}{SlicedData}(x)
+\S4method{ncol}{SlicedData}(x)
+\S4method{dim}{SlicedData}(x)
+\S4method{rownames}{SlicedData}(x)
+\S4method{colnames}{SlicedData}(x)
+\S4method{rownames}{SlicedData}(x) <- value
+\S4method{colnames}{SlicedData}(x) <- value
+
+# SlicedData object can be easily transformed into a matrix
+# preserving row and column names
+\S4method{as.matrix}{SlicedData}(x)
+
+# length(x) can be used in place of x$nSlices()
+# to get the number of slices in the object
+\S4method{length}{SlicedData}(x)
+}
+\arguments{
+	\item{x}{
+		\code{\linkS4class{SlicedData}} object.
+	}
+	\item{i}{
+		Number of a slice.
+	}
+	\item{value}{
+		New content for the slice / new row or column names.
+	}
+}
+
+
+\examples{
+
+# Create a SlicedData variable
+sd = SlicedData$new();
+
+# Show the details of the empty object
+show(sd);
+
+# Create a matrix of values and assign to sd
+mat = matrix(1:12, 3, 4);
+rownames(mat) = c("row1","row2","row3");
+colnames(mat) = c("col1","col2","col3","col4");
+sd$CreateFromMatrix( mat );
+
+# Show the detail of the object (one slice)
+show(sd);
+
+# Slice it in pieces of 2 rows
+sd$ResliceCombined(sliceSize = 2L);
+
+# Show the number of slices (equivalent function calls)
+sd$nSlices()
+length(sd)
+
+# Is it all in one slice? (No)
+sd$IsCombined()
+
+# Show the column names (equivalent function calls)
+sd$columnNames
+colnames(sd)
+
+# Show row name slices
+sd$rowNameSlices
+
+# Show all row names (equivalent function calls)
+sd$GetAllRowNames()
+rownames(sd)
+
+# Print the second slice
+print( sd[[2]] )
+
+# Reorder and subset columns
+sd$ColumnSubsample( c(1,3,4) );
+
+# Reorder and subset rows
+sd$RowReorder( c(3,1) );
+
+# Show the detail of the object (one slice again)
+show(sd);
+
+# Is it all in one slice? (Yes)
+sd$IsCombined()
+
+# Find the row with name "row1" (it is second in the first slice)
+sd$FindRow("row1");
+
+}
\ No newline at end of file