Mercurial > repos > jasonxu > matrixeqtl
comparison MatrixEQTL/man/SlicedData-class.Rd @ 0:cd4c8e4a4b5b draft
Uploaded
author | jasonxu |
---|---|
date | Fri, 12 Mar 2021 08:12:46 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:cd4c8e4a4b5b |
---|---|
1 \name{SlicedData-class} | |
2 \Rdversion{1.1} | |
3 \docType{class} | |
4 \alias{SlicedData-class} | |
5 \alias{SlicedData} | |
6 \alias{SlicedData-class} | |
7 \alias{[[,SlicedData-method} | |
8 \alias{[[<-,SlicedData-method} | |
9 \alias{colnames,SlicedData-method} | |
10 \alias{colnames<-,SlicedData-method} | |
11 \alias{dim,SlicedData-method} | |
12 \alias{length,SlicedData-method} | |
13 \alias{ncol,SlicedData-method} | |
14 \alias{NCOL,SlicedData-method} | |
15 \alias{nrow,SlicedData-method} | |
16 \alias{NROW,SlicedData-method} | |
17 \alias{rownames,SlicedData-method} | |
18 \alias{rownames<-,SlicedData-method} | |
19 \alias{show,SlicedData-method} | |
20 \alias{as.matrix,SlicedData-method} | |
21 | |
22 \alias{rowMeans,SlicedData-method} | |
23 \alias{rowSums,SlicedData-method} | |
24 \alias{colMeans,SlicedData-method} | |
25 \alias{colSums,SlicedData-method} | |
26 \alias{summary.SlicedData} | |
27 | |
28 \title{Class \code{SlicedData} for storing large matrices} | |
29 \description{ | |
30 This class is created for fast and memory efficient manipulations with large datasets presented in matrix form. | |
31 It is used to load, store, and manipulate large datasets, e.g. genotype and gene expression matrices. | |
32 When a dataset is loaded, it is sliced in blocks of 1,000 rows (default size). | |
33 This allows imputing, standardizing, and performing other operations with the data with minimal memory overhead. | |
34 } | |
35 \section{Extends}{ | |
36 \code{SlicedData} is a reference classes (\code{\linkS4class{envRefClass}}). | |
37 Its methods can change the values of the fields of the class. | |
38 } | |
39 \references{ | |
40 The package website: \url{http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/} | |
41 } | |
42 \author{ | |
43 Andrey Shabalin \email{ashabalin@vcu.edu} | |
44 } | |
45 \seealso{ | |
46 This class is used to load data for eQTL analysis by \code{\link{Matrix_eQTL_engine}}. | |
47 } | |
48 \keyword{classes} | |
49 \section{Fields}{ | |
50 \describe{ | |
51 \item{\code{dataEnv}:}{\code{environment}. Stores the slices of the data matrix. The slices should be accessed via \code{getSlice()} and \code{setSlice()} methods. } | |
52 \item{\code{nSlices1}:}{\code{numeric}. Number of slices. For internal use. The value should be access via \code{nSlices()} method. } | |
53 \item{\code{rowNameSlices}:}{\code{list}. Slices of row names. } | |
54 \item{\code{columnNames}:}{\code{character}. Column names. } | |
55 \item{\code{fileDelimiter}:}{\code{character}. Delimiter separating values in the input file. } | |
56 \item{\code{fileSkipColumns}:}{\code{numeric}. Number of columns with row labels in the input file. } | |
57 \item{\code{fileSkipRows}:}{\code{numeric}. Number of rows with column labels in the input file. } | |
58 \item{\code{fileSliceSize}:}{\code{numeric}. Maximum number of rows in a slice. } | |
59 \item{\code{fileOmitCharacters}:}{\code{character}. Missing value (NaN) representation in the input file. } | |
60 } | |
61 } | |
62 \section{Methods}{ | |
63 \describe{ | |
64 \item{\code{initialize(mat)}:}{ Create the object from a matrix. } | |
65 \item{\code{nSlices()}:}{ Returns the number of slices. } | |
66 \item{\code{nCols()}:}{ Returns the number of columns in the matrix. } | |
67 \item{\code{nRows()}:}{ Returns the number of rows in the matrix. } | |
68 \item{\code{Clear()}:}{ Clears the object. Removes the data slices and row and column names. } | |
69 \item{\code{Clone()}:}{ Makes a copy of the object. Changes to the copy do not affect the source object. } | |
70 \item{\code{CreateFromMatrix(mat)}:}{ Creates \code{SlicedData} object from a \code{\link[base]{matrix}}.} | |
71 \item{\code{LoadFile(filename, skipRows = NULL, skipColumns = NULL,} \cr | |
72 \code{sliceSize = NULL, omitCharacters = NULL, delimiter = NULL, rowNamesColumn = 1)}:}{ Loads data matrix from a file. \code{filename} should be a character string. The remaining parameters specify the file format and have the same meaning as \code{file*} fields. Additional \code{rowNamesColumn} parameter specifies which of the columns of row labels to use as row names.} | |
73 \item{\code{SaveFile(filename)}:}{ Saves the data to a file. \code{filename} should be a character string.} | |
74 \item{\code{getSlice(sl)}:}{ Retrieves \code{sl}-th slice of the matrix. } | |
75 \item{\code{setSlice(sl, value)}:}{ Set \code{sl}-th slice of the matrix. } | |
76 \item{\code{ColumnSubsample(subset)}:}{ Reorders/subsets the columns according to \code{subset}. \cr | |
77 Acts as \code{M = M[ ,subset]} for a matrix \code{M}. } | |
78 \item{\code{RowReorder(ordr)}:}{ Reorders rows according to \code{ordr}. \cr | |
79 Acts as \code{M = M[ordr, ]} for a matrix \code{M}. } | |
80 \item{\code{RowMatrixMultiply(multiplier)}:}{ Multiply each row by the \code{multiplier}. \cr | |
81 Acts as \code{M = M \%*\% multiplier} for a matrix \code{M}. } | |
82 \item{\code{CombineInOneSlice()}:}{ Combines all slices into one. The whole matrix can then be obtained via \code{$getSlice(1)}. } | |
83 \item{\code{IsCombined()}:}{ Returns \code{TRUE} if the number of slices is 1 or 0. } | |
84 \item{\code{ResliceCombined(sliceSize = -1)}:}{ Cuts the data into slices of \code{sliceSize} rows. If \code{sliceSize} is not defined, the value of \code{fileSliceSize} field is used.} | |
85 \item{\code{GetAllRowNames()}:}{ Returns all row names in one vector. } | |
86 \item{\code{RowStandardizeCentered()}:}{ Set the mean of each row to zero and the sum of squares to one. } | |
87 \item{\code{SetNanRowMean()}:}{ Impute rows with row mean. Rows full of NaN values are imputed with zeros. } | |
88 \item{\code{RowRemoveZeroEps()}:}{ Removes rows of zeros and those that are nearly zero. } | |
89 \item{\code{FindRow(rowname)}:}{ Finds row by name. Returns a pair of slice number an row number within the slice. If no row is found, the function returns \code{NULL}. } | |
90 \item{\code{rowMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row means. Works as \link[base]{rowMeans} but requires \code{dims} to be equal to \code{1L}.} | |
91 \item{\code{rowSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of row sums. Works as \link[base]{rowSums} but requires \code{dims} to be equal to \code{1L}.} | |
92 \item{\code{colMeans(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column means. Works as \link[base]{colMeans} but requires \code{dims} to be equal to \code{1L}.} | |
93 \item{\code{colSums(x, na.rm = FALSE, dims = 1L)}:}{Returns a vector of column sums. Works as \link[base]{colSums} but requires \code{dims} to be equal to \code{1L}.} | |
94 } | |
95 } | |
96 | |
97 \usage{ | |
98 # x[[i]] indexing allows easy access to individual slices. | |
99 # It is equivalent to x$GetSlice(i) and x$SetSlice(i,value) | |
100 \S4method{[[}{SlicedData}(x, i) | |
101 \S4method{[[}{SlicedData}(x, i) <- value | |
102 | |
103 # The following commands work as if x was a simple matrix object | |
104 \S4method{nrow}{SlicedData}(x) | |
105 \S4method{ncol}{SlicedData}(x) | |
106 \S4method{dim}{SlicedData}(x) | |
107 \S4method{rownames}{SlicedData}(x) | |
108 \S4method{colnames}{SlicedData}(x) | |
109 \S4method{rownames}{SlicedData}(x) <- value | |
110 \S4method{colnames}{SlicedData}(x) <- value | |
111 | |
112 # SlicedData object can be easily transformed into a matrix | |
113 # preserving row and column names | |
114 \S4method{as.matrix}{SlicedData}(x) | |
115 | |
116 # length(x) can be used in place of x$nSlices() | |
117 # to get the number of slices in the object | |
118 \S4method{length}{SlicedData}(x) | |
119 } | |
120 \arguments{ | |
121 \item{x}{ | |
122 \code{\linkS4class{SlicedData}} object. | |
123 } | |
124 \item{i}{ | |
125 Number of a slice. | |
126 } | |
127 \item{value}{ | |
128 New content for the slice / new row or column names. | |
129 } | |
130 } | |
131 | |
132 | |
133 \examples{ | |
134 | |
135 # Create a SlicedData variable | |
136 sd = SlicedData$new(); | |
137 | |
138 # Show the details of the empty object | |
139 show(sd); | |
140 | |
141 # Create a matrix of values and assign to sd | |
142 mat = matrix(1:12, 3, 4); | |
143 rownames(mat) = c("row1","row2","row3"); | |
144 colnames(mat) = c("col1","col2","col3","col4"); | |
145 sd$CreateFromMatrix( mat ); | |
146 | |
147 # Show the detail of the object (one slice) | |
148 show(sd); | |
149 | |
150 # Slice it in pieces of 2 rows | |
151 sd$ResliceCombined(sliceSize = 2L); | |
152 | |
153 # Show the number of slices (equivalent function calls) | |
154 sd$nSlices() | |
155 length(sd) | |
156 | |
157 # Is it all in one slice? (No) | |
158 sd$IsCombined() | |
159 | |
160 # Show the column names (equivalent function calls) | |
161 sd$columnNames | |
162 colnames(sd) | |
163 | |
164 # Show row name slices | |
165 sd$rowNameSlices | |
166 | |
167 # Show all row names (equivalent function calls) | |
168 sd$GetAllRowNames() | |
169 rownames(sd) | |
170 | |
171 # Print the second slice | |
172 print( sd[[2]] ) | |
173 | |
174 # Reorder and subset columns | |
175 sd$ColumnSubsample( c(1,3,4) ); | |
176 | |
177 # Reorder and subset rows | |
178 sd$RowReorder( c(3,1) ); | |
179 | |
180 # Show the detail of the object (one slice again) | |
181 show(sd); | |
182 | |
183 # Is it all in one slice? (Yes) | |
184 sd$IsCombined() | |
185 | |
186 # Find the row with name "row1" (it is second in the first slice) | |
187 sd$FindRow("row1"); | |
188 | |
189 } |