Mercurial > repos > jasonxu > matrixeqtl
diff MatrixEQTL/man/SlicedData-class.Rd @ 0:cd4c8e4a4b5b draft
Uploaded
author | jasonxu |
---|---|
date | Fri, 12 Mar 2021 08:12:46 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MatrixEQTL/man/SlicedData-class.Rd Fri Mar 12 08:12:46 2021 +0000 @@ -0,0 +1,189 @@ +\name{SlicedData-class} +\Rdversion{1.1} +\docType{class} +\alias{SlicedData-class} +\alias{SlicedData} +\alias{SlicedData-class} +\alias{[[,SlicedData-method} +\alias{[[<-,SlicedData-method} +\alias{colnames,SlicedData-method} +\alias{colnames<-,SlicedData-method} +\alias{dim,SlicedData-method} +\alias{length,SlicedData-method} +\alias{ncol,SlicedData-method} +\alias{NCOL,SlicedData-method} +\alias{nrow,SlicedData-method} +\alias{NROW,SlicedData-method} +\alias{rownames,SlicedData-method} +\alias{rownames<-,SlicedData-method} +\alias{show,SlicedData-method} +\alias{as.matrix,SlicedData-method} + +\alias{rowMeans,SlicedData-method} +\alias{rowSums,SlicedData-method} +\alias{colMeans,SlicedData-method} +\alias{colSums,SlicedData-method} +\alias{summary.SlicedData} + +\title{Class \code{SlicedData} for storing large matrices} +\description{ + This class is created for fast and memory efficient manipulations with large datasets presented in matrix form. + It is used to load, store, and manipulate large datasets, e.g. genotype and gene expression matrices. + When a dataset is loaded, it is sliced in blocks of 1,000 rows (default size). + This allows imputing, standardizing, and performing other operations with the data with minimal memory overhead. +} +\section{Extends}{ + \code{SlicedData} is a reference classes (\code{\linkS4class{envRefClass}}). + Its methods can change the values of the fields of the class. +} +\references{ + The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/} +} +\author{ + Andrey Shabalin \email{ashabalin@vcu.edu} +} +\seealso{ + This class is used to load data for eQTL analysis by \code{\link{Matrix_eQTL_engine}}. +} +\keyword{classes} +\section{Fields}{ + \describe{ + \item{\code{dataEnv}:}{\code{environment}. Stores the slices of the data matrix. The slices should be accessed via \code{getSlice()} and \code{setSlice()} methods. } + \item{\code{nSlices1}:}{\code{numeric}. Number of slices. For internal use. The value should be access via \code{nSlices()} method. } + \item{\code{rowNameSlices}:}{\code{list}. Slices of row names. } + \item{\code{columnNames}:}{\code{character}. Column names. } + \item{\code{fileDelimiter}:}{\code{character}. Delimiter separating values in the input file. } + \item{\code{fileSkipColumns}:}{\code{numeric}. Number of columns with row labels in the input file. } + \item{\code{fileSkipRows}:}{\code{numeric}. Number of rows with column labels in the input file. } + \item{\code{fileSliceSize}:}{\code{numeric}. Maximum number of rows in a slice. } + \item{\code{fileOmitCharacters}:}{\code{character}. Missing value (NaN) representation in the input file. } + } +} +\section{Methods}{ + \describe{ + \item{\code{initialize(mat)}:}{ Create the object from a matrix. } + \item{\code{nSlices()}:}{ Returns the number of slices. } + \item{\code{nCols()}:}{ Returns the number of columns in the matrix. } + \item{\code{nRows()}:}{ Returns the number of rows in the matrix. } + \item{\code{Clear()}:}{ Clears the object. Removes the data slices and row and column names. } + \item{\code{Clone()}:}{ Makes a copy of the object. Changes to the copy do not affect the source object. } + \item{\code{CreateFromMatrix(mat)}:}{ Creates \code{SlicedData} object from a \code{\link[base]{matrix}}.} + \item{\code{LoadFile(filename, skipRows = NULL, skipColumns = NULL,} \cr + \code{sliceSize = NULL, omitCharacters = NULL, delimiter = NULL, rowNamesColumn = 1)}:}{ Loads data matrix from a file. \code{filename} should be a character string. The remaining parameters specify the file format and have the same meaning as \code{file*} fields. Additional \code{rowNamesColumn} parameter specifies which of the columns of row labels to use as row names.} + \item{\code{SaveFile(filename)}:}{ Saves the data to a file. \code{filename} should be a character string.} + \item{\code{getSlice(sl)}:}{ Retrieves \code{sl}-th slice of the matrix. } + \item{\code{setSlice(sl, value)}:}{ Set \code{sl}-th slice of the matrix. } + \item{\code{ColumnSubsample(subset)}:}{ Reorders/subsets the columns according to \code{subset}. \cr +Acts as \code{M = M[ ,subset]} for a matrix \code{M}. } + \item{\code{RowReorder(ordr)}:}{ Reorders rows according to \code{ordr}. \cr +Acts as \code{M = M[ordr, ]} for a matrix \code{M}. } + \item{\code{RowMatrixMultiply(multiplier)}:}{ Multiply each row by the \code{multiplier}. \cr +Acts as \code{M = M \%*\% multiplier} for a matrix \code{M}. } + \item{\code{CombineInOneSlice()}:}{ Combines all slices into one. The whole matrix can then be obtained via \code{$getSlice(1)}. } + \item{\code{IsCombined()}:}{ Returns \code{TRUE} if the number of slices is 1 or 0. } + \item{\code{ResliceCombined(sliceSize = -1)}:}{ Cuts the data into slices of \code{sliceSize} rows. If \code{sliceSize} is not defined, the value of \code{fileSliceSize} field is used.} + \item{\code{GetAllRowNames()}:}{ Returns all row names in one vector. } + \item{\code{RowStandardizeCentered()}:}{ Set the mean of each row to zero and the sum of squares to one. } + \item{\code{SetNanRowMean()}:}{ Impute rows with row mean. Rows full of NaN values are imputed with zeros. } + \item{\code{RowRemoveZeroEps()}:}{ Removes rows of zeros and those that are nearly zero. } + \item{\code{FindRow(rowname)}:}{ Finds row by name. Returns a pair of slice number an row number within the slice. If no row is found, the function returns \code{NULL}. } + \item{\code{rowMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row means. Works as \link[base]{rowMeans} but requires \code{dims} to be equal to \code{1L}.} + \item{\code{rowSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row sums. Works as \link[base]{rowSums} but requires \code{dims} to be equal to \code{1L}.} + \item{\code{colMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column means. Works as \link[base]{colMeans} but requires \code{dims} to be equal to \code{1L}.} + \item{\code{colSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column sums. Works as \link[base]{colSums} but requires \code{dims} to be equal to \code{1L}.} + } +} + +\usage{ +# x[[i]] indexing allows easy access to individual slices. +# It is equivalent to x$GetSlice(i) and x$SetSlice(i,value) +\S4method{[[}{SlicedData}(x, i) +\S4method{[[}{SlicedData}(x, i) <- value + +# The following commands work as if x was a simple matrix object +\S4method{nrow}{SlicedData}(x) +\S4method{ncol}{SlicedData}(x) +\S4method{dim}{SlicedData}(x) +\S4method{rownames}{SlicedData}(x) +\S4method{colnames}{SlicedData}(x) +\S4method{rownames}{SlicedData}(x) <- value +\S4method{colnames}{SlicedData}(x) <- value + +# SlicedData object can be easily transformed into a matrix +# preserving row and column names +\S4method{as.matrix}{SlicedData}(x) + +# length(x) can be used in place of x$nSlices() +# to get the number of slices in the object +\S4method{length}{SlicedData}(x) +} +\arguments{ + \item{x}{ + \code{\linkS4class{SlicedData}} object. + } + \item{i}{ + Number of a slice. + } + \item{value}{ + New content for the slice / new row or column names. + } +} + + +\examples{ + +# Create a SlicedData variable +sd = SlicedData$new(); + +# Show the details of the empty object +show(sd); + +# Create a matrix of values and assign to sd +mat = matrix(1:12, 3, 4); +rownames(mat) = c("row1","row2","row3"); +colnames(mat) = c("col1","col2","col3","col4"); +sd$CreateFromMatrix( mat ); + +# Show the detail of the object (one slice) +show(sd); + +# Slice it in pieces of 2 rows +sd$ResliceCombined(sliceSize = 2L); + +# Show the number of slices (equivalent function calls) +sd$nSlices() +length(sd) + +# Is it all in one slice? (No) +sd$IsCombined() + +# Show the column names (equivalent function calls) +sd$columnNames +colnames(sd) + +# Show row name slices +sd$rowNameSlices + +# Show all row names (equivalent function calls) +sd$GetAllRowNames() +rownames(sd) + +# Print the second slice +print( sd[[2]] ) + +# Reorder and subset columns +sd$ColumnSubsample( c(1,3,4) ); + +# Reorder and subset rows +sd$RowReorder( c(3,1) ); + +# Show the detail of the object (one slice again) +show(sd); + +# Is it all in one slice? (Yes) +sd$IsCombined() + +# Find the row with name "row1" (it is second in the first slice) +sd$FindRow("row1"); + +} \ No newline at end of file