comparison MatrixEQTL/man/SlicedData-class.Rd @ 0:cd4c8e4a4b5b draft

Uploaded
author jasonxu
date Fri, 12 Mar 2021 08:12:46 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:cd4c8e4a4b5b
1 \name{SlicedData-class}
2 \Rdversion{1.1}
3 \docType{class}
4 \alias{SlicedData-class}
5 \alias{SlicedData}
6 \alias{SlicedData-class}
7 \alias{[[,SlicedData-method}
8 \alias{[[<-,SlicedData-method}
9 \alias{colnames,SlicedData-method}
10 \alias{colnames<-,SlicedData-method}
11 \alias{dim,SlicedData-method}
12 \alias{length,SlicedData-method}
13 \alias{ncol,SlicedData-method}
14 \alias{NCOL,SlicedData-method}
15 \alias{nrow,SlicedData-method}
16 \alias{NROW,SlicedData-method}
17 \alias{rownames,SlicedData-method}
18 \alias{rownames<-,SlicedData-method}
19 \alias{show,SlicedData-method}
20 \alias{as.matrix,SlicedData-method}
21
22 \alias{rowMeans,SlicedData-method}
23 \alias{rowSums,SlicedData-method}
24 \alias{colMeans,SlicedData-method}
25 \alias{colSums,SlicedData-method}
26 \alias{summary.SlicedData}
27
28 \title{Class \code{SlicedData} for storing large matrices}
29 \description{
30 This class is created for fast and memory efficient manipulations with large datasets presented in matrix form.
31 It is used to load, store, and manipulate large datasets, e.g. genotype and gene expression matrices.
32 When a dataset is loaded, it is sliced in blocks of 1,000 rows (default size).
33 This allows imputing, standardizing, and performing other operations with the data with minimal memory overhead.
34 }
35 \section{Extends}{
36 \code{SlicedData} is a reference classes (\code{\linkS4class{envRefClass}}).
37 Its methods can change the values of the fields of the class.
38 }
39 \references{
40 The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
41 }
42 \author{
43 Andrey Shabalin \email{ashabalin@vcu.edu}
44 }
45 \seealso{
46 This class is used to load data for eQTL analysis by \code{\link{Matrix_eQTL_engine}}.
47 }
48 \keyword{classes}
49 \section{Fields}{
50 \describe{
51 \item{\code{dataEnv}:}{\code{environment}. Stores the slices of the data matrix. The slices should be accessed via \code{getSlice()} and \code{setSlice()} methods. }
52 \item{\code{nSlices1}:}{\code{numeric}. Number of slices. For internal use. The value should be access via \code{nSlices()} method. }
53 \item{\code{rowNameSlices}:}{\code{list}. Slices of row names. }
54 \item{\code{columnNames}:}{\code{character}. Column names. }
55 \item{\code{fileDelimiter}:}{\code{character}. Delimiter separating values in the input file. }
56 \item{\code{fileSkipColumns}:}{\code{numeric}. Number of columns with row labels in the input file. }
57 \item{\code{fileSkipRows}:}{\code{numeric}. Number of rows with column labels in the input file. }
58 \item{\code{fileSliceSize}:}{\code{numeric}. Maximum number of rows in a slice. }
59 \item{\code{fileOmitCharacters}:}{\code{character}. Missing value (NaN) representation in the input file. }
60 }
61 }
62 \section{Methods}{
63 \describe{
64 \item{\code{initialize(mat)}:}{ Create the object from a matrix. }
65 \item{\code{nSlices()}:}{ Returns the number of slices. }
66 \item{\code{nCols()}:}{ Returns the number of columns in the matrix. }
67 \item{\code{nRows()}:}{ Returns the number of rows in the matrix. }
68 \item{\code{Clear()}:}{ Clears the object. Removes the data slices and row and column names. }
69 \item{\code{Clone()}:}{ Makes a copy of the object. Changes to the copy do not affect the source object. }
70 \item{\code{CreateFromMatrix(mat)}:}{ Creates \code{SlicedData} object from a \code{\link[base]{matrix}}.}
71 \item{\code{LoadFile(filename, skipRows = NULL, skipColumns = NULL,} \cr
72 \code{sliceSize = NULL, omitCharacters = NULL, delimiter = NULL, rowNamesColumn = 1)}:}{ Loads data matrix from a file. \code{filename} should be a character string. The remaining parameters specify the file format and have the same meaning as \code{file*} fields. Additional \code{rowNamesColumn} parameter specifies which of the columns of row labels to use as row names.}
73 \item{\code{SaveFile(filename)}:}{ Saves the data to a file. \code{filename} should be a character string.}
74 \item{\code{getSlice(sl)}:}{ Retrieves \code{sl}-th slice of the matrix. }
75 \item{\code{setSlice(sl, value)}:}{ Set \code{sl}-th slice of the matrix. }
76 \item{\code{ColumnSubsample(subset)}:}{ Reorders/subsets the columns according to \code{subset}. \cr
77 Acts as \code{M = M[ ,subset]} for a matrix \code{M}. }
78 \item{\code{RowReorder(ordr)}:}{ Reorders rows according to \code{ordr}. \cr
79 Acts as \code{M = M[ordr, ]} for a matrix \code{M}. }
80 \item{\code{RowMatrixMultiply(multiplier)}:}{ Multiply each row by the \code{multiplier}. \cr
81 Acts as \code{M = M \%*\% multiplier} for a matrix \code{M}. }
82 \item{\code{CombineInOneSlice()}:}{ Combines all slices into one. The whole matrix can then be obtained via \code{$getSlice(1)}. }
83 \item{\code{IsCombined()}:}{ Returns \code{TRUE} if the number of slices is 1 or 0. }
84 \item{\code{ResliceCombined(sliceSize = -1)}:}{ Cuts the data into slices of \code{sliceSize} rows. If \code{sliceSize} is not defined, the value of \code{fileSliceSize} field is used.}
85 \item{\code{GetAllRowNames()}:}{ Returns all row names in one vector. }
86 \item{\code{RowStandardizeCentered()}:}{ Set the mean of each row to zero and the sum of squares to one. }
87 \item{\code{SetNanRowMean()}:}{ Impute rows with row mean. Rows full of NaN values are imputed with zeros. }
88 \item{\code{RowRemoveZeroEps()}:}{ Removes rows of zeros and those that are nearly zero. }
89 \item{\code{FindRow(rowname)}:}{ Finds row by name. Returns a pair of slice number an row number within the slice. If no row is found, the function returns \code{NULL}. }
90 \item{\code{rowMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row means. Works as \link[base]{rowMeans} but requires \code{dims} to be equal to \code{1L}.}
91 \item{\code{rowSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row sums. Works as \link[base]{rowSums} but requires \code{dims} to be equal to \code{1L}.}
92 \item{\code{colMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column means. Works as \link[base]{colMeans} but requires \code{dims} to be equal to \code{1L}.}
93 \item{\code{colSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column sums. Works as \link[base]{colSums} but requires \code{dims} to be equal to \code{1L}.}
94 }
95 }
96
97 \usage{
98 # x[[i]] indexing allows easy access to individual slices.
99 # It is equivalent to x$GetSlice(i) and x$SetSlice(i,value)
100 \S4method{[[}{SlicedData}(x, i)
101 \S4method{[[}{SlicedData}(x, i) <- value
102
103 # The following commands work as if x was a simple matrix object
104 \S4method{nrow}{SlicedData}(x)
105 \S4method{ncol}{SlicedData}(x)
106 \S4method{dim}{SlicedData}(x)
107 \S4method{rownames}{SlicedData}(x)
108 \S4method{colnames}{SlicedData}(x)
109 \S4method{rownames}{SlicedData}(x) <- value
110 \S4method{colnames}{SlicedData}(x) <- value
111
112 # SlicedData object can be easily transformed into a matrix
113 # preserving row and column names
114 \S4method{as.matrix}{SlicedData}(x)
115
116 # length(x) can be used in place of x$nSlices()
117 # to get the number of slices in the object
118 \S4method{length}{SlicedData}(x)
119 }
120 \arguments{
121 \item{x}{
122 \code{\linkS4class{SlicedData}} object.
123 }
124 \item{i}{
125 Number of a slice.
126 }
127 \item{value}{
128 New content for the slice / new row or column names.
129 }
130 }
131
132
133 \examples{
134
135 # Create a SlicedData variable
136 sd = SlicedData$new();
137
138 # Show the details of the empty object
139 show(sd);
140
141 # Create a matrix of values and assign to sd
142 mat = matrix(1:12, 3, 4);
143 rownames(mat) = c("row1","row2","row3");
144 colnames(mat) = c("col1","col2","col3","col4");
145 sd$CreateFromMatrix( mat );
146
147 # Show the detail of the object (one slice)
148 show(sd);
149
150 # Slice it in pieces of 2 rows
151 sd$ResliceCombined(sliceSize = 2L);
152
153 # Show the number of slices (equivalent function calls)
154 sd$nSlices()
155 length(sd)
156
157 # Is it all in one slice? (No)
158 sd$IsCombined()
159
160 # Show the column names (equivalent function calls)
161 sd$columnNames
162 colnames(sd)
163
164 # Show row name slices
165 sd$rowNameSlices
166
167 # Show all row names (equivalent function calls)
168 sd$GetAllRowNames()
169 rownames(sd)
170
171 # Print the second slice
172 print( sd[[2]] )
173
174 # Reorder and subset columns
175 sd$ColumnSubsample( c(1,3,4) );
176
177 # Reorder and subset rows
178 sd$RowReorder( c(3,1) );
179
180 # Show the detail of the object (one slice again)
181 show(sd);
182
183 # Is it all in one slice? (Yes)
184 sd$IsCombined()
185
186 # Find the row with name "row1" (it is second in the first slice)
187 sd$FindRow("row1");
188
189 }