0
|
1 \name{SlicedData-class}
|
|
2 \Rdversion{1.1}
|
|
3 \docType{class}
|
|
4 \alias{SlicedData-class}
|
|
5 \alias{SlicedData}
|
|
6 \alias{SlicedData-class}
|
|
7 \alias{[[,SlicedData-method}
|
|
8 \alias{[[<-,SlicedData-method}
|
|
9 \alias{colnames,SlicedData-method}
|
|
10 \alias{colnames<-,SlicedData-method}
|
|
11 \alias{dim,SlicedData-method}
|
|
12 \alias{length,SlicedData-method}
|
|
13 \alias{ncol,SlicedData-method}
|
|
14 \alias{NCOL,SlicedData-method}
|
|
15 \alias{nrow,SlicedData-method}
|
|
16 \alias{NROW,SlicedData-method}
|
|
17 \alias{rownames,SlicedData-method}
|
|
18 \alias{rownames<-,SlicedData-method}
|
|
19 \alias{show,SlicedData-method}
|
|
20 \alias{as.matrix,SlicedData-method}
|
|
21
|
|
22 \alias{rowMeans,SlicedData-method}
|
|
23 \alias{rowSums,SlicedData-method}
|
|
24 \alias{colMeans,SlicedData-method}
|
|
25 \alias{colSums,SlicedData-method}
|
|
26 \alias{summary.SlicedData}
|
|
27
|
|
28 \title{Class \code{SlicedData} for storing large matrices}
|
|
29 \description{
|
|
30 This class is created for fast and memory efficient manipulations with large datasets presented in matrix form.
|
|
31 It is used to load, store, and manipulate large datasets, e.g. genotype and gene expression matrices.
|
|
32 When a dataset is loaded, it is sliced in blocks of 1,000 rows (default size).
|
|
33 This allows imputing, standardizing, and performing other operations with the data with minimal memory overhead.
|
|
34 }
|
|
35 \section{Extends}{
|
|
36 \code{SlicedData} is a reference classes (\code{\linkS4class{envRefClass}}).
|
|
37 Its methods can change the values of the fields of the class.
|
|
38 }
|
|
39 \references{
|
|
40 The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/}
|
|
41 }
|
|
42 \author{
|
|
43 Andrey Shabalin \email{ashabalin@vcu.edu}
|
|
44 }
|
|
45 \seealso{
|
|
46 This class is used to load data for eQTL analysis by \code{\link{Matrix_eQTL_engine}}.
|
|
47 }
|
|
48 \keyword{classes}
|
|
49 \section{Fields}{
|
|
50 \describe{
|
|
51 \item{\code{dataEnv}:}{\code{environment}. Stores the slices of the data matrix. The slices should be accessed via \code{getSlice()} and \code{setSlice()} methods. }
|
|
52 \item{\code{nSlices1}:}{\code{numeric}. Number of slices. For internal use. The value should be access via \code{nSlices()} method. }
|
|
53 \item{\code{rowNameSlices}:}{\code{list}. Slices of row names. }
|
|
54 \item{\code{columnNames}:}{\code{character}. Column names. }
|
|
55 \item{\code{fileDelimiter}:}{\code{character}. Delimiter separating values in the input file. }
|
|
56 \item{\code{fileSkipColumns}:}{\code{numeric}. Number of columns with row labels in the input file. }
|
|
57 \item{\code{fileSkipRows}:}{\code{numeric}. Number of rows with column labels in the input file. }
|
|
58 \item{\code{fileSliceSize}:}{\code{numeric}. Maximum number of rows in a slice. }
|
|
59 \item{\code{fileOmitCharacters}:}{\code{character}. Missing value (NaN) representation in the input file. }
|
|
60 }
|
|
61 }
|
|
62 \section{Methods}{
|
|
63 \describe{
|
|
64 \item{\code{initialize(mat)}:}{ Create the object from a matrix. }
|
|
65 \item{\code{nSlices()}:}{ Returns the number of slices. }
|
|
66 \item{\code{nCols()}:}{ Returns the number of columns in the matrix. }
|
|
67 \item{\code{nRows()}:}{ Returns the number of rows in the matrix. }
|
|
68 \item{\code{Clear()}:}{ Clears the object. Removes the data slices and row and column names. }
|
|
69 \item{\code{Clone()}:}{ Makes a copy of the object. Changes to the copy do not affect the source object. }
|
|
70 \item{\code{CreateFromMatrix(mat)}:}{ Creates \code{SlicedData} object from a \code{\link[base]{matrix}}.}
|
|
71 \item{\code{LoadFile(filename, skipRows = NULL, skipColumns = NULL,} \cr
|
|
72 \code{sliceSize = NULL, omitCharacters = NULL, delimiter = NULL, rowNamesColumn = 1)}:}{ Loads data matrix from a file. \code{filename} should be a character string. The remaining parameters specify the file format and have the same meaning as \code{file*} fields. Additional \code{rowNamesColumn} parameter specifies which of the columns of row labels to use as row names.}
|
|
73 \item{\code{SaveFile(filename)}:}{ Saves the data to a file. \code{filename} should be a character string.}
|
|
74 \item{\code{getSlice(sl)}:}{ Retrieves \code{sl}-th slice of the matrix. }
|
|
75 \item{\code{setSlice(sl, value)}:}{ Set \code{sl}-th slice of the matrix. }
|
|
76 \item{\code{ColumnSubsample(subset)}:}{ Reorders/subsets the columns according to \code{subset}. \cr
|
|
77 Acts as \code{M = M[ ,subset]} for a matrix \code{M}. }
|
|
78 \item{\code{RowReorder(ordr)}:}{ Reorders rows according to \code{ordr}. \cr
|
|
79 Acts as \code{M = M[ordr, ]} for a matrix \code{M}. }
|
|
80 \item{\code{RowMatrixMultiply(multiplier)}:}{ Multiply each row by the \code{multiplier}. \cr
|
|
81 Acts as \code{M = M \%*\% multiplier} for a matrix \code{M}. }
|
|
82 \item{\code{CombineInOneSlice()}:}{ Combines all slices into one. The whole matrix can then be obtained via \code{$getSlice(1)}. }
|
|
83 \item{\code{IsCombined()}:}{ Returns \code{TRUE} if the number of slices is 1 or 0. }
|
|
84 \item{\code{ResliceCombined(sliceSize = -1)}:}{ Cuts the data into slices of \code{sliceSize} rows. If \code{sliceSize} is not defined, the value of \code{fileSliceSize} field is used.}
|
|
85 \item{\code{GetAllRowNames()}:}{ Returns all row names in one vector. }
|
|
86 \item{\code{RowStandardizeCentered()}:}{ Set the mean of each row to zero and the sum of squares to one. }
|
|
87 \item{\code{SetNanRowMean()}:}{ Impute rows with row mean. Rows full of NaN values are imputed with zeros. }
|
|
88 \item{\code{RowRemoveZeroEps()}:}{ Removes rows of zeros and those that are nearly zero. }
|
|
89 \item{\code{FindRow(rowname)}:}{ Finds row by name. Returns a pair of slice number an row number within the slice. If no row is found, the function returns \code{NULL}. }
|
|
90 \item{\code{rowMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row means. Works as \link[base]{rowMeans} but requires \code{dims} to be equal to \code{1L}.}
|
|
91 \item{\code{rowSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row sums. Works as \link[base]{rowSums} but requires \code{dims} to be equal to \code{1L}.}
|
|
92 \item{\code{colMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column means. Works as \link[base]{colMeans} but requires \code{dims} to be equal to \code{1L}.}
|
|
93 \item{\code{colSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column sums. Works as \link[base]{colSums} but requires \code{dims} to be equal to \code{1L}.}
|
|
94 }
|
|
95 }
|
|
96
|
|
97 \usage{
|
|
98 # x[[i]] indexing allows easy access to individual slices.
|
|
99 # It is equivalent to x$GetSlice(i) and x$SetSlice(i,value)
|
|
100 \S4method{[[}{SlicedData}(x, i)
|
|
101 \S4method{[[}{SlicedData}(x, i) <- value
|
|
102
|
|
103 # The following commands work as if x was a simple matrix object
|
|
104 \S4method{nrow}{SlicedData}(x)
|
|
105 \S4method{ncol}{SlicedData}(x)
|
|
106 \S4method{dim}{SlicedData}(x)
|
|
107 \S4method{rownames}{SlicedData}(x)
|
|
108 \S4method{colnames}{SlicedData}(x)
|
|
109 \S4method{rownames}{SlicedData}(x) <- value
|
|
110 \S4method{colnames}{SlicedData}(x) <- value
|
|
111
|
|
112 # SlicedData object can be easily transformed into a matrix
|
|
113 # preserving row and column names
|
|
114 \S4method{as.matrix}{SlicedData}(x)
|
|
115
|
|
116 # length(x) can be used in place of x$nSlices()
|
|
117 # to get the number of slices in the object
|
|
118 \S4method{length}{SlicedData}(x)
|
|
119 }
|
|
120 \arguments{
|
|
121 \item{x}{
|
|
122 \code{\linkS4class{SlicedData}} object.
|
|
123 }
|
|
124 \item{i}{
|
|
125 Number of a slice.
|
|
126 }
|
|
127 \item{value}{
|
|
128 New content for the slice / new row or column names.
|
|
129 }
|
|
130 }
|
|
131
|
|
132
|
|
133 \examples{
|
|
134
|
|
135 # Create a SlicedData variable
|
|
136 sd = SlicedData$new();
|
|
137
|
|
138 # Show the details of the empty object
|
|
139 show(sd);
|
|
140
|
|
141 # Create a matrix of values and assign to sd
|
|
142 mat = matrix(1:12, 3, 4);
|
|
143 rownames(mat) = c("row1","row2","row3");
|
|
144 colnames(mat) = c("col1","col2","col3","col4");
|
|
145 sd$CreateFromMatrix( mat );
|
|
146
|
|
147 # Show the detail of the object (one slice)
|
|
148 show(sd);
|
|
149
|
|
150 # Slice it in pieces of 2 rows
|
|
151 sd$ResliceCombined(sliceSize = 2L);
|
|
152
|
|
153 # Show the number of slices (equivalent function calls)
|
|
154 sd$nSlices()
|
|
155 length(sd)
|
|
156
|
|
157 # Is it all in one slice? (No)
|
|
158 sd$IsCombined()
|
|
159
|
|
160 # Show the column names (equivalent function calls)
|
|
161 sd$columnNames
|
|
162 colnames(sd)
|
|
163
|
|
164 # Show row name slices
|
|
165 sd$rowNameSlices
|
|
166
|
|
167 # Show all row names (equivalent function calls)
|
|
168 sd$GetAllRowNames()
|
|
169 rownames(sd)
|
|
170
|
|
171 # Print the second slice
|
|
172 print( sd[[2]] )
|
|
173
|
|
174 # Reorder and subset columns
|
|
175 sd$ColumnSubsample( c(1,3,4) );
|
|
176
|
|
177 # Reorder and subset rows
|
|
178 sd$RowReorder( c(3,1) );
|
|
179
|
|
180 # Show the detail of the object (one slice again)
|
|
181 show(sd);
|
|
182
|
|
183 # Is it all in one slice? (Yes)
|
|
184 sd$IsCombined()
|
|
185
|
|
186 # Find the row with name "row1" (it is second in the first slice)
|
|
187 sd$FindRow("row1");
|
|
188
|
|
189 } |