annotate MatrixEQTL/R/Matrix_eQTL_engine.R @ 0:cd4c8e4a4b5b draft

Uploaded
author jasonxu
date Fri, 12 Mar 2021 08:12:46 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1 # Matrix eQTL by Andrey A. Shabalin
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
2 # http://www.bios.unc.edu/research/genomic_software/Matrix_eQTL/
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
3
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
4 # http://cran.r-project.org/web/packages/policies.html
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
5
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
6 library(methods)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
7
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
8 modelLINEAR = 117348L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
9 modelANOVA = 47074L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
10 modelLINEAR_CROSS = 1113461L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
11
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
12 .seq = function(a,b){if(a<=b){a:b}else{integer(0)}};
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
13
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
14 SlicedData <- setRefClass( "SlicedData",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
15 fields = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
16 dataEnv = "environment",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
17 nSlices1 = "numeric",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
18 rowNameSlices = "list",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
19 columnNames = "character",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
20 fileDelimiter = "character",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
21 fileSkipColumns = "numeric",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
22 fileSkipRows = "numeric",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
23 fileSliceSize = "numeric",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
24 fileOmitCharacters = "character"
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
25 ),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
26 methods = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
27 initialize = function( mat = NULL ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
28 dataEnv <<- new.env(hash = TRUE, size = 29L);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
29 nSlices1 <<- 0L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
30 if(!is.null(mat)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
31 CreateFromMatrix(mat);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
32 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
33 fileSliceSize <<- 1000;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
34 fileDelimiter <<- "\t";
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
35 fileSkipColumns <<- 1L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
36 fileSkipRows <<- 1L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
37 fileOmitCharacters <<- "NA"
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
38 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
39 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
40 CreateFromMatrix = function( mat ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
41 stopifnot( class(mat) == "matrix" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
42 setSliceRaw( 1L ,mat );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
43 rns = rownames( mat, do.NULL = FALSE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
44 #if( is.null(rns) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
45 # rns = paste( "Row_",(1:nrow(mat)), sep="" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
46 #}
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
47 rowNameSlices <<- list(rns);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
48 cns = colnames( mat, do.NULL = FALSE );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
49 #if( is.null(cns) ){
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
50 # cns = paste( "Col_",(1:ncol(mat)), sep="" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
51 #}
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
52 columnNames <<- cns;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
53 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
54 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
55 getSlice = function(sl) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
56 value = get(paste(sl), dataEnv);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
57 if( is.raw(value) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
58 storage.mode(value) = "double";
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
59 value[value == 255] = NA;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
60 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
61 return( value )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
62 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
63 getSliceRaw = function(sl) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
64 return( get(paste(sl), dataEnv) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
65 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
66 setSliceRaw = function(sl, value) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
67 assign( paste(sl), value, dataEnv )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
68 if( nSlices1 < sl ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
69 nSlices1 <<- sl;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
70 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
71 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
72 setSlice = function(sl, value) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
73 if( length(value) > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
74 if( all(as.integer(value) == value, na.rm = TRUE) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
75 if( (min(value, na.rm = TRUE) >= 0 ) &&
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
76 (max(value, na.rm = TRUE) < 255) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
77 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
78 nv = value;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
79 suppressWarnings({storage.mode(nv) = "raw"});
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
80 nv[ is.na(value)] = as.raw(255);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
81 value = nv;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
82 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
83 storage.mode(value) = "integer";
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
84 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
85 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
86 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
87 setSliceRaw(sl, value);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
88 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
89 nSlices = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
90 return( nSlices1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
91 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
92 LoadFile = function(filename, skipRows = NULL, skipColumns = NULL, sliceSize = NULL, omitCharacters = NULL, delimiter = NULL, rowNamesColumn = 1) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
93 if( !is.null(skipRows) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
94 fileSkipRows <<- skipRows;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
95 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
96 if( !is.null(skipColumns) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
97 fileSkipColumns <<- skipColumns;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
98 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
99 if( !is.null(omitCharacters) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
100 fileOmitCharacters <<- omitCharacters;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
101 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
102 if( !is.null(sliceSize) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
103 fileSliceSize <<- sliceSize;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
104 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
105 if( !is.null(delimiter) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
106 fileDelimiter <<- delimiter;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
107 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
108 stopifnot( (fileSkipColumns == 0) || (rowNamesColumn <= fileSkipColumns) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
109 stopifnot( (fileSkipColumns == 0) || (rowNamesColumn >= 1) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
110
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
111 fid = file(description = filename, open = "rt", blocking = FALSE, raw = FALSE)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
112 # clean object if file is open
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
113 Clear();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
114 lines = readLines(con = fid, n = max(fileSkipRows,1L), ok = TRUE, warn = TRUE)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
115 line1 = tail(lines,1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
116 splt = strsplit(line1, split = fileDelimiter, fixed = TRUE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
117 if( fileSkipRows > 0L ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
118 columnNames <<- splt[[1]]; # [ -(1:fileSkipColumns) ];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
119 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
120 seek(fid, 0)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
121 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
122
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
123 rm( lines, line1, splt );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
124
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
125 rowNameSlices <<- vector("list", 15);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
126
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
127 curSliceId = 0L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
128 repeat
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
129 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
130 # preallocate names and data
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
131 if(length(rowNameSlices) < curSliceId) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
132 rowNameSlices[[2L*curSliceId]] <<- NULL;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
133 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
134 curSliceId = curSliceId + 1L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
135
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
136 # read sliceSize rows
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
137 rowtag = vector("character",fileSliceSize);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
138 rowvals = vector("list",fileSliceSize);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
139 for(i in 1:fileSliceSize) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
140 temp = "";
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
141 if( fileSkipColumns > 0L ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
142 temp = scan(file = fid, what = character(), n = fileSkipColumns, quiet = TRUE,sep = fileDelimiter);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
143 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
144 rowtag[i] = temp[rowNamesColumn];#paste(temp,collapse=" ");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
145 rowvals[[i]] = scan(file = fid, what = double(), nlines = 1, quiet = TRUE, sep = fileDelimiter, na.strings = fileOmitCharacters);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
146 if( length(rowvals[[i]]) == 0L ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
147 if(i==1L) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
148 rowtag = matrix(0, 0, 0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
149 rowvals = character(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
150 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
151 rowtag = rowtag[ 1:(i-1) ];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
152 rowvals = rowvals[ 1:(i-1) ];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
153 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
154 break;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
155 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
156 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
157 if( length(rowtag) == 0L ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
158 curSliceId = curSliceId - 1L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
159 break;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
160 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
161 rowNameSlices[[curSliceId]] <<- rowtag;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
162 data = c(rowvals, recursive = TRUE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
163 dim(data) = c(length(rowvals[[1]]), length(rowvals));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
164 data = t(data);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
165 setSlice(curSliceId, data);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
166 if( length(rowtag) < fileSliceSize ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
167 break;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
168 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
169 numtxt = formatC(curSliceId*fileSliceSize, big.mark=",", format = "f", digits = 0)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
170 cat( "Rows read: ", numtxt, "\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
171 flush.console()
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
172 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
173 close(fid)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
174 if( fileSkipRows == 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
175 columnNames <<- paste("Col_", (1:nCols()), sep="");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
176 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
177 columnNames <<- tail(columnNames, ncol(getSliceRaw(1)));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
178 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
179 if( fileSkipColumns == 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
180 cnt = 0L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
181 for( sl in 1:nSlices() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
182 nr = length(getSliceRaw(sl));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
183 rowNameSlices[[sl]] <<- paste("Row_",cnt + (1:nr),sep="");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
184 cnt = cnt + nr;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
185 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
186 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
187 rowNameSlices <<- rowNameSlices[1:curSliceId];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
188 cat("Rows read: ", nRows(), " done.\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
189 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
190 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
191 SaveFile = function(filename) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
192 if( nSlices() == 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
193 cat("No data to save");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
194 return();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
195 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
196 fid = file(filename,"wt");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
197 for( sl in 1:nSlices() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
198 z = getSlice(sl);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
199 rownames(z) = rowNameSlices[[sl]];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
200 colnames(z) = columnNames;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
201 write.table(z, file = fid, sep = "\t",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
202 col.names = (if(sl == 1){NA}else{FALSE}));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
203 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
204 close(fid);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
205 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
206 nRows = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
207 s = 0L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
208 for(sl in .seq(1,nSlices())) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
209 s = s + nrow(getSliceRaw(sl));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
210 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
211 return( s )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
212 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
213 nCols = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
214 if( nSlices() == 0L ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
215 return(0L);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
216 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
217 return( ncol(getSliceRaw(1L)) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
218 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
219 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
220 Clear = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
221 for( sl in .seq(1,nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
222 rm(list = paste(sl), envir = dataEnv)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
223 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
224 nSlices1 <<- 0L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
225 rowNameSlices <<- list();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
226 columnNames <<- character();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
227 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
228 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
229 IsCombined = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
230 return( nSlices() <= 1L );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
231 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
232 GetAllRowNames = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
233 return( c(rowNameSlices, recursive=TRUE) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
234 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
235 SetNanRowMean = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
236 if( (nCols() == 0L) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
237 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
238 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
239 for( sl in .seq(1,nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
240 slice = getSlice(sl);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
241 if( any(is.na(slice)) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
242 rowmean = rowMeans(slice, na.rm = TRUE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
243 rowmean[is.na(rowmean)] = 0L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
244 for( j in which(!complete.cases(slice)) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
245 where1 = is.na(slice[j, ]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
246 slice[j, where1] = rowmean[j];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
247 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
248 setSlice(sl, slice);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
249 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
250 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
251 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
252 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
253 RowStandardizeCentered = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
254 for(sl in .seq(1,nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
255 slice = getSlice(sl);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
256 div = sqrt( rowSums(slice^2) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
257 div[ div == 0 ] = 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
258 setSlice(sl, slice/div);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
259 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
260 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
261 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
262 CombineInOneSlice = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
263 if( nSlices() <= 1L ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
264 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
265 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
266 nc = nCols();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
267 nr = nRows();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
268 datatypes = c("raw","integer","double");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
269 datafuns = c(as.raw, as.integer, as.double);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
270 datatype = character(nSlices());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
271 for(sl in 1:nSlices()) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
272 datatype[sl] = typeof(getSliceRaw(sl));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
273 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
274 mch = max(match(datatype,datatypes,nomatch = length(datatypes)));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
275 datafun = datafuns[[mch]];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
276 newData = matrix(datafun(0), nrow = nr, ncol = nc);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
277 offset = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
278 for(sl in 1:nSlices()) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
279 if(mch==1) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
280 slice = getSliceRaw(sl);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
281 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
282 slice = getSlice(sl);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
283 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
284 newData[ offset + (1:nrow(slice)),] = datafun(slice);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
285 setSlice(sl, numeric());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
286 offset = offset + nrow(slice);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
287 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
288
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
289 nSlices1 <<- 1L;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
290 setSliceRaw(1L, newData);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
291 rm(newData);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
292
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
293 newrowNameSlices = GetAllRowNames();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
294 rowNameSlices <<- list(newrowNameSlices)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
295 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
296 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
297 ResliceCombined = function(sliceSize = -1) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
298 if( sliceSize > 0L ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
299 fileSliceSize <<- sliceSize;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
300 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
301 if( fileSliceSize <= 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
302 fileSliceSize <<- 1000;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
303 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
304 if( IsCombined() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
305 nRows1 = nRows();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
306 if(nRows1 == 0L) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
307 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
308 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
309 newNSlices = floor( (nRows1 + fileSliceSize - 1)/fileSliceSize );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
310 oldData = getSliceRaw(1L);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
311 #oldNames = rowNameSlices[[1]];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
312 newNameslices = vector("list",newNSlices)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
313 for( sl in 1:newNSlices ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
314 range = (1+(sl-1)*fileSliceSize) : (min(nRows1,sl*fileSliceSize));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
315 newpart = oldData[range, ,drop = FALSE];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
316 if( is.raw(oldData) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
317 setSliceRaw( sl, newpart);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
318 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
319 setSlice( sl, newpart);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
320 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
321 newNameslices[[sl]] = rowNameSlices[[1]][range];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
322 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
323 rowNameSlices <<- newNameslices ;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
324 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
325 stop("Reslice of a sliced matrix is not supported yet. Use CombineInOneSlice first.");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
326 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
327 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
328 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
329 Clone = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
330 clone = SlicedData$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
331 for(sl in .seq(1,nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
332 clone$setSliceRaw(sl,getSliceRaw(sl));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
333 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
334 clone$rowNameSlices = rowNameSlices;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
335 clone$columnNames = columnNames;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
336 clone$fileDelimiter = fileDelimiter;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
337 clone$fileSkipColumns = fileSkipColumns;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
338 clone$fileSkipRows = fileSkipRows;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
339 clone$fileSliceSize = fileSliceSize;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
340 clone$fileOmitCharacters = fileOmitCharacters;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
341 return( clone );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
342 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
343 RowMatrixMultiply = function(multiplier) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
344 for(sl in .seq(1,nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
345 setSlice(sl, getSlice(sl) %*% multiplier);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
346 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
347 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
348 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
349 ColumnSubsample = function(subset) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
350 for(sl in .seq(1,nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
351 setSliceRaw(sl, getSliceRaw(sl)[ ,subset, drop = FALSE]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
352 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
353 columnNames <<- columnNames[subset];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
354 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
355 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
356 RowReorderSimple = function(ordr) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
357 # had to use an inefficient and dirty method
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
358 # due to horrible memory management in R
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
359 if( (typeof(ordr) == "logical") && all(ordr) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
360 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
361 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
362 if( (length(ordr) == nRows()) && all(ordr == (1:length(ordr))) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
363 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
364 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
365 CombineInOneSlice();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
366 gc();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
367 setSliceRaw( 1L, getSliceRaw(1L)[ordr, ] );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
368 rowNameSlices[[1]] <<- rowNameSlices[[1]][ordr];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
369 gc();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
370 ResliceCombined();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
371 gc();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
372 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
373 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
374 RowReorder = function(ordr) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
375 # transform logical into indices
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
376 if( typeof(ordr) == "logical" ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
377 if( length(ordr) == nRows() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
378 ordr = which(ordr);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
379 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
380 stop("Parameter \"ordr\" has wrong length")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
381 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
382 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
383 ## first, check that anything has to be done at all
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
384 if( (length(ordr) == nRows()) && all(ordr == (1:length(ordr))) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
385 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
386 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
387 ## check bounds
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
388 #if( (min(ordr) < 1) || (max(ordr) > nRows()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
389 # stop("Parameter \"ordr\" is out of bounds");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
390 #}
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
391 ## slice the data into individual rows
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
392 all_rows = vector("list", nSlices())
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
393 for( i in 1:nSlices() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
394 slice = getSliceRaw(i)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
395 all_rows[[i]] = split(slice, 1:nrow(slice))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
396 setSliceRaw(i,numeric())
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
397 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
398 gc();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
399 all_rows = unlist(all_rows, recursive=FALSE, use.names = FALSE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
400 ## Reorder the rows
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
401 all_rows = all_rows[ordr];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
402 ## get row names
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
403 all_names = GetAllRowNames();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
404 ## erase the set
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
405 rowNameSlices <<- list();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
406 ## sort names
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
407 all_names = all_names[ordr];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
408 ##
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
409 ## Make slices back
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
410 nrows = length(all_rows);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
411 nSlices1 <<- as.integer((nrows+fileSliceSize-1)/fileSliceSize);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
412 ##cat(nrows, " ", nSlices1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
413 rowNameSlices1 = vector("list", nSlices1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
414 for( i in 1:nSlices1 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
415 fr = 1 + fileSliceSize*(i-1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
416 to = min( fileSliceSize*i, nrows);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
417
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
418 subset = all_rows[fr:to];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
419 types = unlist(lapply(subset,typeof));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
420 israw = (types == "raw")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
421 if(!all(israw == israw[1])) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
422 # some raw and some are not
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
423 subset = lapply(subset, function(x){if(is.raw(x)){x=as.integer(x);x[x==255] = NA;return(x)}else{return(x)}});
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
424 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
425 subset = unlist(subset);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
426 dim(subset) = c( length(all_rows[[fr]]) , to - fr + 1)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
427 #subset = matrix(subset, ncol = (to-fr+1));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
428 if(is.raw(subset)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
429 setSliceRaw(i, t(subset));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
430 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
431 setSlice(i, t(subset));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
432 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
433 rowNameSlices1[[i]] = all_names[fr:to];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
434 all_rows[fr:to] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
435 all_names[fr:to] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
436 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
437 rowNameSlices <<- rowNameSlices1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
438 gc();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
439 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
440 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
441 RowRemoveZeroEps = function(){
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
442 for(sl in .seq(1,nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
443 slice = getSlice(sl);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
444 amean = rowMeans(abs(slice));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
445 remove = (amean < .Machine$double.eps*nCols());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
446 if(any(remove)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
447 rowNameSlices[[sl]] <<- rowNameSlices[[sl]][!remove];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
448 setSlice(sl, slice[!remove, , drop = FALSE]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
449 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
450 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
451 return(invisible(.self));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
452 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
453 FindRow = function(rowname) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
454 for(sl in .seq(1,nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
455 mch = match(rowname,rowNameSlices[[sl]], nomatch = 0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
456 if( mch > 0 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
457 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
458 row = getSlice(sl)[mch[1], , drop=FALSE];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
459 rownames(row) = rowname;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
460 colnames(row) = columnNames;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
461 return( list(slice = sl, item = mch, row = row) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
462 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
463 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
464 return( NULL );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
465 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
466 show = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
467 cat("SlicedData object. For more information type: ?SlicedData\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
468 cat("Number of columns:", nCols(), "\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
469 cat("Number of rows:", nRows(), "\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
470 cat("Data is stored in", nSlices(), "slices\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
471 if(nCols()>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
472 z = getSlice(1L);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
473 if(nrow(z)>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
474 z = z[1:min(nrow(z),10L), 1:min(ncol(z),10L), drop = FALSE];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
475 rownames(z) = rowNameSlices[[1]][1:nrow(z)];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
476 colnames(z) = columnNames[1:ncol(z)];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
477 cat("Top left corner of the first slice (up to 10x10):\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
478 methods:::show(z)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
479 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
480 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
481 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
482 ))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
483
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
484 setGeneric("nrow")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
485 setMethod("nrow", "SlicedData", function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
486 return( x$nRows() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
487 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
488 setGeneric("NROW")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
489 setMethod("NROW", "SlicedData", function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
490 return( x$nRows() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
491 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
492 setGeneric("ncol")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
493 setMethod("ncol", "SlicedData", function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
494 return( x$nCols() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
495 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
496 setGeneric("NCOL")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
497 setMethod("NCOL", "SlicedData", function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
498 return( x$nCols() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
499 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
500 setGeneric("dim")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
501 setMethod("dim", "SlicedData", function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
502 return( c(x$nRows(),x$nCols()) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
503 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
504 setGeneric("colnames")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
505 setMethod("colnames", "SlicedData", function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
506 return( x$columnNames );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
507 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
508 setGeneric("rownames")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
509 setMethod("rownames", "SlicedData", function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
510 return( x$GetAllRowNames() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
511 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
512 setMethod("[[", "SlicedData", function(x,i) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
513 return( x$getSlice(i) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
514 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
515 setGeneric("length")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
516 setMethod("length", "SlicedData", function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
517 return( x$nSlices() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
518 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
519 setMethod("[[<-", "SlicedData", function(x,i,value) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
520 x$setSlice(i, value);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
521 return(x);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
522 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
523 summary.SlicedData = function(object, ...) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
524 z = c(nCols = object$nCols(), nRows = object$nRows(), nSlices = object$nSlices());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
525 return(z);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
526 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
527
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
528 ##### setGeneric("summary") #####
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
529 #setMethod("summary", "SlicedData", function(object, ...) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
530 # z = c(nCols = object$nCols(), nRows = object$nRows(), nSlices = object$nSlices());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
531 # return(z);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
532 # })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
533 #setGeneric("show", standardGeneric("show"))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
534 # setMethod("show", "SlicedData", function(object) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
535 # cat("SlicedData object. For more information type: ?SlicedData\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
536 # cat("Number of columns:", object$nCols(), "\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
537 # cat("Number of rows:", object$nRows(), "\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
538 # cat("Data is stored in", object$nSlices(), "slices\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
539 # if(object$nSlices()>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
540 # z = object$getSlice(1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
541 # if(nrow(z)>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
542 # z = z[1:min(nrow(z),10), 1:min(ncol(z),10), drop = FALSE];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
543 # rownames(z) = object$rowNameSlices[[1]][1:nrow(z)];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
544 # colnames(z) = object$columnNames[1:ncol(z)];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
545 # cat("Top left corner of the first slice (up to 10x10):\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
546 # show(z)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
547 # }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
548 # }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
549 # })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
550
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
551 setGeneric("as.matrix")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
552 setMethod("as.matrix", "SlicedData", function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
553 if(x$nSlices() == 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
554 return( matrix(0,0,0) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
555 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
556 if(x$nSlices() > 1) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
557 copy = x$Clone();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
558 copy$CombineInOneSlice();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
559 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
560 copy = x;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
561 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
562 mat = copy$getSlice(1L);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
563 rownames(mat) = rownames(copy);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
564 colnames(mat) = colnames(copy);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
565 return( mat );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
566 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
567 setGeneric("colnames<-")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
568 setMethod("colnames<-", "SlicedData", function(x,value) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
569 stopifnot( class(value) == "character" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
570 stopifnot( length(value) == x$nCols() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
571 x$columnNames = value;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
572 return(x);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
573 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
574 setGeneric("rownames<-")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
575 setMethod("rownames<-", "SlicedData", function(x,value) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
576 stopifnot( class(value) == "character" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
577 stopifnot( length(value) == x$nRows() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
578 start = 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
579 newNameSlices = vector("list", x$nSlices());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
580 for( i in .seq(1,x$nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
581 nr = nrow(x$getSliceRaw(i));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
582 newNameSlices[[i]] = value[ start:(start+nr-1) ];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
583 start = start + nr;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
584 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
585 x$rowNameSlices = newNameSlices;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
586 return(x);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
587 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
588 setGeneric("rowSums")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
589 setMethod("rowSums", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
590 if(x$nSlices() == 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
591 return( numeric() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
592 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
593 stopifnot( dims == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
594 thesum = vector("list", x$nSlices());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
595 for( i in 1:x$nSlices() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
596 thesum[[i]] = rowSums(x$getSlice(i), na.rm)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
597 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
598 return(unlist(thesum, recursive = FALSE, use.names = FALSE));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
599 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
600 setGeneric("rowMeans")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
601 setMethod("rowMeans", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
602 if(x$nSlices() == 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
603 return( numeric() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
604 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
605 stopifnot( dims == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
606 thesum = vector("list", x$nSlices());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
607 for( i in 1:x$nSlices() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
608 thesum[[i]] = rowMeans(x$getSlice(i), na.rm)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
609 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
610 return(unlist(thesum, recursive = FALSE, use.names = FALSE));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
611 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
612 setGeneric("colSums")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
613 setMethod("colSums", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
614 if(x$nCols() == 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
615 return( numeric() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
616 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
617 stopifnot( dims == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
618 thesum = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
619 for( i in .seq(1,x$nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
620 thesum = thesum + colSums(x$getSlice(i), na.rm)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
621 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
622 return(thesum);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
623 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
624 setGeneric("colMeans")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
625 setMethod("colMeans", "SlicedData", function(x, na.rm = FALSE, dims = 1L) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
626 if(x$nCols() == 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
627 return( numeric() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
628 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
629 stopifnot( dims == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
630 thesum = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
631 thecounts = x$nRows();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
632 for( i in .seq(1,x$nSlices()) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
633 slice = x$getSlice(i);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
634 thesum = thesum + colSums(slice, na.rm)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
635 if( na.rm ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
636 thecounts = thecounts - colSums(is.na(slice))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
637 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
638 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
639 return(thesum/thecounts);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
640 })
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
641
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
642 .listBuilder <- setRefClass(".listBuilder",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
643 fields = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
644 dataEnv = "environment",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
645 n = "numeric"
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
646 ),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
647 methods = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
648 initialize = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
649 dataEnv <<- new.env(hash = TRUE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
650 n <<- 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
651 # cumlength <<- 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
652 return(.self);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
653 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
654 add = function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
655 if(length(x) > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
656 n <<- n + 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
657 # cumlength <<- cumlength + length(x);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
658 assign(paste(n), x, dataEnv );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
659 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
660 return(.self);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
661 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
662 set = function(i,x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
663 if(length(x) > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
664 if(i>n)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
665 n <<- i;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
666 assign(paste(i), x, dataEnv );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
667 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
668 return(.self);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
669 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
670 get = function(i) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
671 return(base::get(paste(i),dataEnv));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
672 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
673 list = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
674 if(n==0) return(list());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
675 result = vector('list',n);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
676 for( i in 1:n) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
677 result[[i]] = .self$get(i);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
678 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
679 return(result);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
680 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
681 unlist = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
682 return(base::unlist(.self$list(), recursive=FALSE, use.names = FALSE));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
683 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
684 show = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
685 cat(".listBuilder object.\nIternal object in MatrixEQTL package.\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
686 cat("Number of elements:", object$n, "\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
687 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
688 ))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
689
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
690 .histogrammer <- setRefClass(".histogrammer",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
691 fields = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
692 pvbins1 = "numeric",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
693 statbins1 = "numeric",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
694 hist.count = "numeric"
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
695 ),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
696 methods = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
697 initialize = function (pvbins, statbins) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
698 if(length(pvbins)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
699 ord = order(statbins);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
700 pvbins1 <<- pvbins[ord];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
701 statbins1 <<- statbins[ord];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
702 hist.count <<- double(length(pvbins)-1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
703 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
704 return(.self);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
705 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
706 update = function(stats.for.hist) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
707 h = hist(stats.for.hist, breaks = statbins1, include.lowest = TRUE, right = TRUE, plot = FALSE)$counts;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
708 hist.count <<- hist.count + h;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
709 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
710 getResults = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
711 if(!is.unsorted(pvbins1)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
712 return(list(hist.bins = pvbins1 , hist.counts = hist.count ));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
713 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
714 return(list(hist.bins = rev(pvbins1), hist.counts = rev(hist.count)));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
715 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
716 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
717 ))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
718
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
719
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
720 .minpvalue <- setRefClass(".minpvalue",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
721 fields = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
722 sdata = ".listBuilder",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
723 gdata = ".listBuilder"
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
724 ),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
725 methods = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
726 initialize = function(snps, gene) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
727 sdata <<- .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
728 for( ss in 1:snps$nSlices() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
729 sdata$set( ss, double(nrow(snps$getSliceRaw(ss))));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
730 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
731 gdata <<- .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
732 for( gg in 1:gene$nSlices() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
733 gdata$set( gg, double(nrow(gene$getSliceRaw(gg))));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
734 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
735 return(.self);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
736 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
737 update = function(ss, gg, astat) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
738 gmax = gdata$get(gg)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
739 z1 = max.col(astat,ties.method='first');
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
740 z11 = astat[1:nrow(astat) + nrow(astat) * (z1 - 1)];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
741 gmax = pmax(gmax, z11);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
742 gdata$set(gg, gmax);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
743
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
744 smax = sdata$get(ss)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
745 z22 = apply(astat,2,max);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
746 smax = pmax(smax, z22);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
747 sdata$set(ss, smax);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
748 return(.self);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
749 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
750 updatecis = function(ss, gg, select.cis, astat) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
751 if(length(astat)>0)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
752 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
753 byrows = aggregate(x=astat, by=list(row=select.cis[,1]), FUN=max);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
754 bycols = aggregate(x=astat, by=list(col=select.cis[,2]), FUN=max);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
755
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
756 gmax = gdata$get(gg);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
757 gmax[byrows$row] = pmax(gmax[byrows$row], byrows$x)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
758 gdata$set(gg, gmax);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
759
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
760 smax = sdata$get(ss)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
761 smax[bycols$col] = pmax(smax[bycols$col], bycols$x)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
762 sdata$set(ss, smax);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
763 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
764 return(.self);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
765 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
766 getResults = function(snps, gene, pvfun) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
767 min.pv.snps = pvfun(sdata$unlist());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
768 names(min.pv.snps) = rownames(snps);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
769 min.pv.gene = pvfun(gdata$unlist());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
770 names(min.pv.gene) = rownames(gene);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
771 return(list(min.pv.snps = min.pv.snps, min.pv.gene = min.pv.gene));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
772 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
773 ))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
774
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
775 .OutputSaver_FRD <- setRefClass(".OutputSaver_FRD",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
776 fields = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
777 sdata = ".listBuilder",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
778 gdata = ".listBuilder",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
779 cdata = ".listBuilder",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
780 bdata = ".listBuilder",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
781 fid = "list",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
782 testfun1 = "list",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
783 pvfun1 = "list"
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
784 ),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
785 methods = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
786 initialize = function () {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
787 sdata <<- .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
788 gdata <<- .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
789 cdata <<- .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
790 bdata <<- .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
791 fid <<- list(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
792 testfun1 <<- list(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
793 pvfun1 <<- list(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
794 return(.self);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
795 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
796 start = function(filename, statistic_name, unused1, unused2, testfun, pvfun) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
797 testfun1 <<- list(testfun);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
798 pvfun1 <<- list(pvfun);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
799 if(length(filename) > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
800 if(class(filename) == "character") {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
801 fid <<- list(file(description = filename, open = "wt", blocking = FALSE, raw = FALSE), TRUE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
802 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
803 fid <<- list(filename, FALSE)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
804 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
805 writeLines( paste("SNP\tgene\t",statistic_name,"\tp-value\tFDR", sep = ""), fid[[1]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
806 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
807 fid <<- list();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
808 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
809 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
810 update = function(spos, gpos, sta, beta = NULL) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
811 if(length(sta)>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
812 sdata$add(spos);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
813 gdata$add(gpos);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
814 cdata$add(sta );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
815 if(!is.null(beta ))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
816 bdata$add(beta );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
817 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
818 return(.self);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
819 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
820 getResults = function( gene, snps, FDR_total_count) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
821 pvalues = NULL;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
822 if(cdata$n > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
823 tests = testfun1[[1]](cdata$unlist());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
824 cdata <<- .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
825
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
826 pvalues = pvfun1[[1]](tests);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
827 ord = order(pvalues);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
828
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
829 tests = tests[ord];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
830 pvalues = pvalues[ord];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
831
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
832 FDR = pvalues * FDR_total_count / (1:length(pvalues));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
833 FDR[length(FDR)] = min(FDR[length(FDR)], 1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
834 FDR = rev(cummin(rev(FDR)));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
835
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
836 snps_names = snps$GetAllRowNames()[sdata$unlist()[ord]];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
837 sdata <<- .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
838 gene_names = gene$GetAllRowNames()[gdata$unlist()[ord]];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
839 gdata <<- .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
840
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
841 beta = NULL;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
842 if(bdata$n > 0)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
843 beta = bdata$unlist()[ord];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
844
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
845 if(length(fid)>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
846 step = 1000; ########### 100000
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
847 for( part in 1:ceiling(length(FDR)/step) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
848 fr = (part-1)*step + 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
849 to = min(part*step, length(FDR));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
850 dump = data.frame(snps_names[fr:to],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
851 gene_names[fr:to],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
852 if(is.null(beta)) tests[fr:to] else list(beta[fr:to],tests[fr:to]),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
853 pvalues[fr:to],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
854 FDR[fr:to],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
855 row.names = NULL,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
856 check.rows = FALSE,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
857 check.names = FALSE,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
858 stringsAsFactors = FALSE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
859 write.table(dump, file = fid[[1]], quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
860 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
861 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
862 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
863 cat("No significant associations were found.\n", file = if(length(fid)>0){fid[[1]]}else{""});
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
864 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
865 if(length(fid)>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
866 if(fid[[2]]) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
867 close(fid[[1]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
868 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
869 fid <<- list();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
870 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
871
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
872 if(!is.null(pvalues)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
873 eqtls = list( snps = snps_names,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
874 gene = gene_names,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
875 statistic = tests,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
876 pvalue = pvalues,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
877 FDR = FDR);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
878 if(!is.null(beta))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
879 eqtls$beta = beta;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
880 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
881 eqtls = list( snps = character(),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
882 gene = character(),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
883 beta = numeric(),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
884 statistic = numeric(),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
885 pvalue = numeric(),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
886 FDR = numeric());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
887 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
888 return(list(eqtls = data.frame(eqtls)));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
889 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
890 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
891 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
892
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
893
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
894 .OutputSaver_direct <- setRefClass(".OutputSaver_direct",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
895 fields = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
896 gene_names = "character",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
897 snps_names = "character",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
898 fid = "list",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
899 testfun1 = "list",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
900 pvfun1 = "list"
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
901 ),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
902 methods = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
903 initialize = function() {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
904 gene_names <<- character(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
905 snps_names <<- character(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
906 fid <<- list(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
907 testfun1 <<- list(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
908 pvfun1 <<- list(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
909 return(.self);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
910 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
911 start = function(filename, statistic_name, snps, gene, testfun, pvfun) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
912 # I hope the program stops if it fails to open the file
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
913 if(class(filename) == "character") {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
914 fid <<- list(file(description = filename, open = "wt", blocking = FALSE, raw = FALSE), TRUE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
915 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
916 fid <<- list(filename, FALSE)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
917 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
918 writeLines(paste("SNP\tgene\t", statistic_name, "\tp-value", sep = ""), fid[[1]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
919 gene_names <<- gene$GetAllRowNames();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
920 snps_names <<- snps$GetAllRowNames();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
921 testfun1 <<- list(testfun);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
922 pvfun1 <<- list(pvfun);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
923 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
924 update = function(spos, gpos, sta, beta = NULL) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
925 if( length(sta) == 0 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
926 return();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
927 sta = testfun1[[1]](sta);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
928 lst = list(snps = snps_names[spos], gene = gene_names[gpos], beta = beta, statistic = sta, pvalue = pvfun1[[1]](sta));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
929 lst$beta = lst$beta;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
930
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
931 dump2 = data.frame(lst, row.names = NULL, check.rows = FALSE, check.names = FALSE, stringsAsFactors = FALSE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
932 write.table(dump2, file = fid[[1]], quote = FALSE, sep = "\t", row.names = FALSE, col.names = FALSE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
933 },
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
934 getResults = function(...) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
935 if(length(fid)>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
936 if(fid[[2]]) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
937 close(fid[[1]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
938 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
939 fid <<- list();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
940 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
941 gene_names <<- character(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
942 snps_names <<- character(0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
943 return(list());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
944 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
945 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
946 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
947
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
948 .my.pmin = function(x, val) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
949 # minimum "pmin" function that can handle empty array
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
950 if(length(x) == 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
951 return(x)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
952 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
953 return(pmin.int(x,val));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
954 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
955 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
956
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
957 .my.pmax = function(x, val) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
958 # minimum "pmin" function that can handle empty array
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
959 if(length(x) == 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
960 return(x)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
961 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
962 return(pmax.int(x,val));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
963 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
964 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
965
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
966 .pv.nz = function(x){return( .my.pmax(x,.Machine$double.xmin) )}
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
967
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
968 .SetNanRowMean = function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
969 if( any(is.na(x)) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
970 rowmean = rowMeans(x, na.rm = TRUE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
971 rowmean[ is.na(rowmean) ] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
972 for( j in which(!complete.cases(x)) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
973 where1 = is.na( x[j, ] );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
974 x[j,where1] = rowmean[j];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
975 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
976 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
977 return(x);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
978 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
979
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
980 # .SNP_process_split_for_ANOVA = function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
981 # # split into 2 dummy variables
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
982 #
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
983 # uniq = unique(c(x));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
984 # uniq = uniq[!is.na(uniq)];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
985 #
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
986 # if( length(uniq) > 3 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
987 # stop("More than three genotype categories is not handled by ANOVA");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
988 # } else if ( length(uniq) < 3 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
989 # uniq = c(uniq, min(uniq)-(1:(3-length(uniq))));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
990 # }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
991 #
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
992 # freq = matrix(0, nrow(x), length(uniq));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
993 # for(i in 1:length(uniq)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
994 # freq[ ,i] = rowSums(x==uniq[i], na.rm = TRUE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
995 # }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
996 #
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
997 # md = apply(freq, 1, which.max);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
998 # freq[ cbind(1:nrow(x),md) ] = -1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
999 #
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1000 # md = apply(freq, 1, which.max); # min(freq[cbind(1:nrow(slice),md)] - rowSums(select,na.rm = TRUE ))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1001 # new_slice1 = (x == uniq[md]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1002 # new_slice1[is.na(new_slice1)] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1003 # freq[ cbind(1:nrow(x),md) ] = -1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1004 #
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1005 # md = apply(freq,1,which.max);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1006 # new_slice2 = (x == uniq[md]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1007 # new_slice2[ is.na(new_slice2) ] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1008 # rez = vector("list", 2);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1009 # rez[[1]] = new_slice1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1010 # rez[[2]] = new_slice2;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1011 # return( rez );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1012 # }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1013
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1014 .SNP_process_split_for_ANOVA = function(x,n.groups) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1015 # split into 2 dummy variables (or more)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1016
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1017 # # Get the number of ANOVA groups
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1018 # n.groups = options('MatrixEQTL.ANOVA.categories')[[1]];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1019 # if( is.null(n.groups))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1020 # n.groups = 3;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1021
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1022 # Unique values in x (make sure it has length of n.groups);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1023 uniq = unique(as.vector(x));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1024 uniq = uniq[!is.na(uniq)];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1025 if( length(uniq) > n.groups ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1026 stop("More than declared number of genotype categories is detected by ANOVA");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1027 } else if ( length(uniq) < n.groups ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1028 uniq = c(uniq, rep(min(uniq)-1, n.groups-length(uniq)));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1029 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1030
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1031 # Table of frequencies for each variable (row)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1032 freq = matrix(0, nrow(x), n.groups);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1033 for(i in 1:n.groups) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1034 freq[ ,i] = rowSums(x==uniq[i], na.rm = TRUE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1035 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1036 # remove NA's from x for convenience
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1037 x[is.na(x)] = min(uniq)-2;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1038
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1039 # Output list of matrices
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1040 rez = vector('list',n.groups-1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1041
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1042 # Skip the most frequent value
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1043 md = apply(freq, 1, which.max); # most frequent value for each variable
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1044 freq[ cbind(1:nrow(x),md) ] = -1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1045
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1046 # The rest form dumm
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1047 for(j in 1:(n.groups-1)){
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1048 md = apply(freq, 1, which.max);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1049 freq[ cbind(1:nrow(x),md) ] = -1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1050 rez[[j]] = (x == uniq[md]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1051 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1052 return( rez );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1053 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1054
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1055 Matrix_eQTL_engine = function(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1056 snps,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1057 gene,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1058 cvrt = SlicedData$new(),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1059 output_file_name,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1060 pvOutputThreshold = 1e-5,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1061 useModel = modelLINEAR,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1062 errorCovariance = numeric(),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1063 verbose = TRUE,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1064 pvalue.hist = FALSE,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1065 min.pv.by.genesnp = FALSE,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1066 noFDRsaveMemory = FALSE) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1067 rez = Matrix_eQTL_main(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1068 snps = snps,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1069 gene = gene,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1070 cvrt = cvrt,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1071 output_file_name = output_file_name,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1072 pvOutputThreshold = pvOutputThreshold,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1073 useModel = useModel,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1074 errorCovariance = errorCovariance,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1075 verbose = verbose,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1076 pvalue.hist = pvalue.hist,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1077 min.pv.by.genesnp = min.pv.by.genesnp,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1078 noFDRsaveMemory = noFDRsaveMemory);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1079 return( rez );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1080 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1081
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1082 Matrix_eQTL_main = function(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1083 snps,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1084 gene,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1085 cvrt = SlicedData$new(),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1086 output_file_name = "",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1087 pvOutputThreshold = 1e-5,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1088 useModel = modelLINEAR,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1089 errorCovariance = numeric(),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1090 verbose = TRUE,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1091 output_file_name.cis = "",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1092 pvOutputThreshold.cis = 0,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1093 snpspos = NULL,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1094 genepos = NULL,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1095 cisDist = 1e6,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1096 pvalue.hist = FALSE,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1097 min.pv.by.genesnp = FALSE,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1098 noFDRsaveMemory = FALSE) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1099 ################################# Basic variable checks #################################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1100 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1101 # status("Performing basic checks of the input variables");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1102 stopifnot( "SlicedData" %in% class(gene) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1103 stopifnot( "SlicedData" %in% class(snps) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1104 stopifnot( "SlicedData" %in% class(cvrt) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1105
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1106 # Check dimensions
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1107 if( min(snps$nRows(),snps$nCols()) == 0 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1108 stop("Empty genotype dataset");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1109 if( min(gene$nRows(),gene$nCols()) == 0 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1110 stop("Empty expression dataset");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1111 if( snps$nCols() != gene$nCols() )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1112 stop("Different number of samples in the genotype and gene expression files");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1113 if( cvrt$nRows()>0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1114 if( snps$nCols() != cvrt$nCols() )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1115 stop("Wrong number of samples in the matrix of covariates");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1116 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1117
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1118 stopifnot( class(pvOutputThreshold) == "numeric" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1119 stopifnot( length(pvOutputThreshold) == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1120 stopifnot( pvOutputThreshold >= 0 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1121 stopifnot( pvOutputThreshold <= 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1122
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1123 stopifnot( class(noFDRsaveMemory) == "logical" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1124 stopifnot( length(noFDRsaveMemory) == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1125
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1126 if( pvOutputThreshold > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1127 stopifnot( !((length(output_file_name) == 0) && noFDRsaveMemory) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1128 stopifnot( length(output_file_name) <= 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1129 if( length(output_file_name) == 1 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1130 stopifnot( class(output_file_name) %in% c("character","connection") );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1131 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1132 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1133
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1134 stopifnot( class(pvOutputThreshold.cis) == "numeric" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1135 stopifnot( length(pvOutputThreshold.cis) == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1136 stopifnot( pvOutputThreshold.cis >= 0 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1137 stopifnot( pvOutputThreshold.cis <= 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1138 stopifnot( !((pvOutputThreshold > 0) & (pvOutputThreshold.cis > 0) & (pvOutputThreshold > pvOutputThreshold.cis)) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1139 stopifnot( (pvOutputThreshold > 0) | (pvOutputThreshold.cis > 0) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1140
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1141 stopifnot( class(useModel) == class(modelLINEAR) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1142 stopifnot( length(useModel) == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1143 stopifnot( useModel %in% c(modelLINEAR, modelANOVA, modelLINEAR_CROSS) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1144 if( useModel %in% c(modelLINEAR, modelLINEAR_CROSS) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1145 if( snps$nCols() <= cvrt$nRows() + 1 + 1) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1146 stop('The number of covariates exceeds the number of samples.\nLinear regression can not be fit.')
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1147 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1148 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1149 if( useModel == modelLINEAR_CROSS ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1150 if( cvrt$nRows() == 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1151 stop( "Model \"modelLINEAR_CROSS\" requires at least one covariate" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1152 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1153 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1154 if( useModel == modelANOVA ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1155 n.anova.groups = getOption('MatrixEQTL.ANOVA.categories', 3);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1156 stopifnot( n.anova.groups == floor(n.anova.groups) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1157 stopifnot( n.anova.groups >= 3 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1158 # stopifnot( n.anova.groups < snps$nCols() - cvrt$nRows() - 2 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1159 if( snps$nCols() <= cvrt$nRows() + n.anova.groups) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1160 stop('The number of covariates exceeds the number of samples.\nLinear regression (ANOVA) can not be fit.')
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1161 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1162 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1163
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1164 stopifnot( class(verbose) == "logical" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1165 stopifnot( length(verbose) == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1166
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1167 stopifnot( class(min.pv.by.genesnp) == "logical" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1168 stopifnot( length(min.pv.by.genesnp) == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1169
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1170 if( pvOutputThreshold.cis > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1171 stopifnot( !((length(output_file_name.cis) == 0) && noFDRsaveMemory) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1172 stopifnot( length(output_file_name.cis) <= 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1173 if( length(output_file_name.cis) == 1 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1174 stopifnot( class(output_file_name.cis) %in% c("character","connection") );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1175 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1176
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1177 # stopifnot( class(output_file_name.cis) == "character" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1178 # stopifnot( length(output_file_name.cis) == 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1179 stopifnot( class(snpspos) == "data.frame" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1180 stopifnot( ncol(snpspos) == 3 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1181 stopifnot( nrow(snpspos) > 0 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1182 stopifnot( class(snpspos[1,3]) %in% c("integer", "numeric") )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1183 stopifnot( !any(is.na(snpspos[,3])) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1184 stopifnot( class(genepos) == "data.frame" );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1185 stopifnot( ncol(genepos) == 4 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1186 stopifnot( nrow(genepos) > 0 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1187 stopifnot( class(genepos[1,3]) %in% c("integer", "numeric") )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1188 stopifnot( class(genepos[1,4]) %in% c("integer", "numeric") )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1189 stopifnot( !any(is.na(genepos[,3])) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1190 stopifnot( !any(is.na(genepos[,4])) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1191 stopifnot( nzchar(output_file_name.cis) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1192 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1193
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1194 if( pvOutputThreshold > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1195 stopifnot( nzchar(output_file_name) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1196 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1197
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1198 stopifnot( class(errorCovariance) %in% c("numeric", "matrix") );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1199 errorCovariance = as.matrix(errorCovariance);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1200 if(length(errorCovariance)>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1201 if( nrow(errorCovariance) != ncol(errorCovariance) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1202 stop("The covariance matrix is not square");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1203 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1204 if( nrow(errorCovariance) != snps$nCols() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1205 stop("The covariance matrix size does not match the number of samples");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1206 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1207 if( !all(errorCovariance == t(errorCovariance)) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1208 stop("The covariance matrix is not symmetric");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1209 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1210 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1211 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1212 ################################# Initial setup #########################################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1213 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1214 gene.std = .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1215 snps.std = .listBuilder$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1216
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1217 dont.clone.gene = getOption('MatrixEQTL.dont.preserve.gene.object', FALSE)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1218 if(is.null(dont.clone.gene))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1219 dont.clone.gene = FALSE;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1220
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1221 if( !dont.clone.gene )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1222 gene = gene$Clone();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1223 # snps = snps$Clone(); # snps is read only
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1224 cvrt = cvrt$Clone();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1225
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1226 params = list(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1227 output_file_name = output_file_name,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1228 pvOutputThreshold = pvOutputThreshold,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1229 useModel = useModel,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1230 errorCovariance = errorCovariance ,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1231 verbose = verbose,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1232 output_file_name.cis = output_file_name.cis,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1233 pvOutputThreshold.cis = pvOutputThreshold.cis,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1234 cisDist = cisDist ,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1235 pvalue.hist = pvalue.hist,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1236 min.pv.by.genesnp = min.pv.by.genesnp);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1237
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1238 if( verbose ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1239 lastTime = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1240 status <- function(text) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1241 # gc();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1242 newTime = proc.time()[3];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1243 if(lastTime != 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1244 cat("Task finished in ", newTime-lastTime, " seconds\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1245 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1246 cat(text,"\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1247 lastTime <<- newTime;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1248 unused = flush.console();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1249 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1250 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1251 status = function(text){}
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1252 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1253 start.time = proc.time()[3];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1254 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1255 ################################# Error covariance matrix processing ####################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1256 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1257 if( length(errorCovariance) > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1258 status("Processing the error covariance matrix");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1259 eig = eigen(errorCovariance, symmetric = TRUE)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1260 d = eig$values;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1261 v = eig$vectors;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1262 # errorCovariance == v %*% diag(d) %*% t(v)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1263 # errorCovariance^0.5 == v*sqrt(d)*v" (no single quotes anymore)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1264 # errorCovariance^(-0.5) == v*diag(1./sqrt(diag(d)))*v"
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1265 if( any(d<=0) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1266 stop("The covariance matrix is not positive definite");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1267 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1268 correctionMatrix = v %*% diag(1./sqrt(d)) %*% t(v);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1269 rm( eig, v, d, errorCovariance )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1270 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1271 rm( errorCovariance );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1272 correctionMatrix = numeric();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1273 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1274 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1275 ################################# Matching gene and SNPs locations ######################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1276 if( pvOutputThreshold.cis > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1277 status("Matching data files and location files")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1278
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1279 # names in the input data
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1280 gene_names = gene$GetAllRowNames();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1281 snps_names = snps$GetAllRowNames();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1282
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1283 # gene range, set: left<right
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1284 if(any(genepos[,3] > genepos[,4])) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1285 temp3 = genepos[,3];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1286 temp4 = genepos[,4];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1287 genepos[,3] = pmin(temp3,temp4);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1288 genepos[,4] = pmax(temp3,temp4);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1289 rm(temp3, temp4);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1290 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1291
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1292 # match with the location data
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1293 genematch = match( gene_names, genepos[ ,1], nomatch = 0L);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1294 usedgene = matrix(FALSE, nrow(genepos), 1); # genes in "genepos" that are matching "gene_names"
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1295 usedgene[ genematch ] = TRUE;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1296 if( !any(genematch) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1297 stop("Gene names do not match those in the gene location file.");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1298 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1299 cat( sum(genematch>0), "of", length(gene_names), " genes matched\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1300
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1301
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1302 snpsmatch = match( snps_names, snpspos[ ,1], nomatch = 0L);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1303 usedsnps = matrix(FALSE, nrow(snpspos),1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1304 usedsnps[ snpsmatch ] = TRUE;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1305 if( !any(snpsmatch) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1306 stop("SNP names do not match those in the SNP location file.");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1307 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1308 cat( sum(snpsmatch>0), "of", length(snps_names), " SNPs matched\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1309
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1310 # list used chr names
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1311 chrNames = unique(c( as.character(unique(snpspos[usedsnps,2])),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1312 as.character(unique(genepos[usedgene,2])) ))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1313 chrNames = chrNames[ sort.list( suppressWarnings(as.integer(chrNames)),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1314 method = "radix", na.last = TRUE ) ];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1315 # match chr names
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1316 genechr = match(genepos[,2],chrNames);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1317 snpschr = match(snpspos[,2],chrNames);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1318
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1319 # max length of a chromosome
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1320 chrMax = max( snpspos[usedsnps, 3], genepos[usedgene, 4], na.rm = TRUE) + cisDist;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1321
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1322 # Single number location for all rows in "genepos" and "snpspos"
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1323 genepos2 = as.matrix(genepos[ ,3:4, drop = FALSE] + (genechr-1)*chrMax);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1324 snpspos2 = as.matrix(snpspos[ ,3 , drop = FALSE] + (snpschr-1)*chrMax);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1325
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1326 # the final location arrays;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1327 snps_pos = matrix(0,length(snps_names),1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1328 snps_pos[snpsmatch>0, ] = snpspos2[snpsmatch, , drop = FALSE];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1329 snps_pos[rowSums(is.na(snps_pos))>0, ] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1330 snps_pos[snps_pos==0] = (length(chrNames)+1) * (chrMax+cisDist);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1331 rm(snps_names, snpsmatch, usedsnps, snpschr, snpspos2)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1332
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1333 gene_pos = matrix(0,length(gene_names),2);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1334 gene_pos[genematch>0, ] = genepos2[genematch, , drop = FALSE];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1335 gene_pos[rowSums(is.na(gene_pos))>0, ] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1336 gene_pos[gene_pos==0] = (length(chrNames)+2) * (chrMax+cisDist);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1337 rm(gene_names, genematch, usedgene, genechr, genepos2)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1338 rm(chrNames, chrMax);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1339
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1340 if( is.unsorted(snps_pos) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1341 status("Reordering SNPs\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1342 ordr = sort.list(snps_pos);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1343 snps$RowReorder(ordr);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1344 snps_pos = snps_pos[ordr, , drop = FALSE];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1345 rm(ordr);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1346 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1347 if( is.unsorted(rowSums(gene_pos)) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1348 status("Reordering genes\n");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1349 ordr = sort.list(rowSums(gene_pos));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1350 gene$RowReorder(ordr);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1351 gene_pos = gene_pos[ordr, , drop = FALSE];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1352 rm(ordr);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1353 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1354
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1355 # Slice it back.
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1356 geneloc = vector("list", gene$nSlices())
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1357 gene_offset = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1358 for(gc in 1:gene$nSlices()) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1359 nr = length(gene$rowNameSlices[[gc]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1360 geneloc[[gc]] = gene_pos[gene_offset + (1:nr), , drop = FALSE];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1361 gene_offset = gene_offset + nr;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1362 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1363 rm(gc, gene_offset, gene_pos);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1364
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1365 snpsloc = vector("list", snps$nSlices())
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1366 snps_offset = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1367 for(sc in 1:snps$nSlices()) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1368 nr = length(snps$rowNameSlices[[sc]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1369 snpsloc[[sc]] = snps_pos[snps_offset + (1:nr), , drop = FALSE];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1370 snps_offset = snps_offset + nr;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1371 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1372 rm(nr, sc, snps_offset, snps_pos);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1373 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1374 ################################# Covariates processing #################################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1375 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1376 status("Processing covariates");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1377 if( useModel == modelLINEAR_CROSS ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1378 last.covariate = as.vector(tail( cvrt$getSlice(cvrt$nSlices()), n = 1));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1379 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1380 if( cvrt$nRows()>0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1381 cvrt$SetNanRowMean();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1382 cvrt$CombineInOneSlice();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1383 cvrt = rbind(matrix(1,1,snps$nCols()),cvrt$getSlice(1));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1384 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1385 cvrt = matrix(1,1,snps$nCols());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1386 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1387 # Correct for the error covariance structure
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1388 if( length(correctionMatrix)>0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1389 cvrt = cvrt %*% correctionMatrix;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1390 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1391 # Orthonormalize covariates
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1392 # status("Orthonormalizing covariates");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1393 q = qr(t(cvrt));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1394 if( min(abs(diag(qr.R(q)))) < .Machine$double.eps * snps$nCols() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1395 stop("Colinear or zero covariates detected");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1396 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1397 cvrt = t( qr.Q(q) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1398 rm( q );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1399 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1400 ################################# Gene expression processing ############################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1401 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1402 status("Processing gene expression data (imputation, residualization, etc.)");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1403 # Impute gene expression
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1404 gene$SetNanRowMean();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1405 # Correct for the error covariance structure
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1406 if( length(correctionMatrix)>0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1407 gene$RowMatrixMultiply(correctionMatrix);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1408 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1409 # Orthogonolize expression w.r.t. covariates
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1410 # status("Orthogonolizing expression w.r.t. covariates");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1411 gene_offsets = double(gene$nSlices()+1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1412 for( sl in 1:gene$nSlices() ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1413 slice = gene$getSlice(sl);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1414 gene_offsets[sl+1] = gene_offsets[sl] + nrow(slice);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1415 rowsq1 = rowSums(slice^2);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1416 slice = slice - tcrossprod(slice,cvrt) %*% cvrt;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1417 rowsq2 = rowSums(slice^2);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1418 # kill rows colinear with the covariates
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1419 delete.rows = (rowsq2 <= rowsq1 * .Machine$double.eps );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1420 slice[delete.rows] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1421 div = sqrt( rowSums(slice^2) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1422 div[ div == 0 ] = 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1423 gene.std$set(sl, div);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1424 gene$setSlice(sl, slice / div);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1425 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1426 rm(rowsq1, rowsq2, delete.rows, div);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1427 rm( sl, slice );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1428 #gene$RowRemoveZeroEps();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1429 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1430 ################################# snps_process, testfun, pvfun, threshfun, afun ########
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1431 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1432 # snps_process - preprocess SNPs slice
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1433 #
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1434 # afun --- abs for signed stats, identity for non-negative
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1435 # threshfun --- internal stat threshold for given p-value
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1436 # testfun --- t or F statistic from the internal one
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1437 # pvfun --- p-value from the t or F statistic
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1438
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1439 nSamples = snps$nCols();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1440 nGenes = gene$nRows();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1441 nSnps = snps$nRows();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1442 nCov = nrow(cvrt);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1443 # nVarTested = length(snps_list); # set in case(useModel)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1444 # dfNull = nSamples - nCov;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1445 # d.f. of the full model
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1446 betafun = NULL;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1447
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1448 if( useModel == modelLINEAR ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1449 snps_process = function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1450 return( list(.SetNanRowMean(x)) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1451 };
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1452 nVarTested = 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1453 dfFull = nSamples - nCov - nVarTested;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1454 statistic.fun = function(mat_list) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1455 return( mat_list[[1]] );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1456 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1457 afun = function(x) {return(abs(x))};
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1458 threshfun = function(pv) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1459 thr = qt(pv/2, dfFull, lower.tail = FALSE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1460 thr = thr^2;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1461 thr = sqrt( thr / (dfFull + thr) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1462 thr[pv >= 1] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1463 thr[pv <= 0] = 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1464 return( thr );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1465 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1466 testfun = function(x) { return( x * sqrt( dfFull / (1 - .my.pmin(x^2,1)))); }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1467 pvfun = function(x) { return( .pv.nz(pt(-abs(x),dfFull)*2)); }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1468 thresh.cis = threshfun(pvOutputThreshold.cis);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1469 thresh = threshfun(pvOutputThreshold);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1470 betafun = function(stat, ss, gg, select) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1471 return(stat * gene.std$get(gg)[select[,1]] / snps.std$get(ss)[select[,2]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1472 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1473 } else
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1474 if( useModel == modelANOVA ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1475 snps_process = function(x).SNP_process_split_for_ANOVA(x,n.anova.groups);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1476 nVarTested = n.anova.groups - 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1477 dfFull = nSamples - nCov - nVarTested;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1478 # statistic.fun = function(mat_list) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1479 # return( mat_list[[1]]^2 + mat_list[[2]]^2 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1480 # }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1481 statistic.fun = function(mat_list) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1482 x = mat_list[[1]]^2;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1483 for( j in 2:length(mat_list) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1484 x = x + mat_list[[j]]^2;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1485 return( x );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1486 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1487 afun = identity;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1488 threshfun = function(pv) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1489 thr = qf(pv, nVarTested, dfFull, lower.tail = FALSE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1490 thr = thr / (dfFull/nVarTested + thr);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1491 thr[pv >= 1] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1492 thr[pv <= 0] = 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1493 return( thr );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1494 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1495 testfun = function(x) { return( x / (1 - .my.pmin(x,1)) * (dfFull/nVarTested) ); }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1496 pvfun = function(x) { return( .pv.nz(pf(x, nVarTested, dfFull, lower.tail = FALSE)) ); }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1497 thresh.cis = threshfun(pvOutputThreshold.cis);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1498 thresh = threshfun(pvOutputThreshold);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1499 } else
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1500 if( useModel == modelLINEAR_CROSS ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1501 last.covariate = as.vector( last.covariate );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1502 snps_process = .SNP_process_split_for_LINEAR_CROSS = function(x) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1503 out = vector("list", 2);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1504 out[[1]] = .SetNanRowMean(x);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1505 out[[2]] = t( t(out[[1]]) * last.covariate );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1506 return( out );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1507 };
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1508 nVarTested = 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1509 dfFull = nSamples - nCov - nVarTested - 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1510 statistic.fun = function(mat_list) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1511 return( mat_list[[2]] / sqrt(1 - mat_list[[1]]^2) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1512 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1513 afun = function(x) {return(abs(x))};
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1514 threshfun = function(pv) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1515 thr = qt(pv/2, dfFull, lower.tail = FALSE);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1516 thr = thr^2;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1517 thr = sqrt( thr / (dfFull + thr) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1518 thr[pv >= 1] = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1519 thr[pv <= 0] = 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1520 return( thr );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1521 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1522 testfun = function(x) { return( x * sqrt( dfFull / (1 - .my.pmin(x^2,1)))); }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1523 pvfun = function(x) { return( .pv.nz(pt(-abs(x),dfFull)*2 )); }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1524 thresh.cis = threshfun(pvOutputThreshold.cis);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1525 thresh = threshfun(pvOutputThreshold);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1526 betafun = function(stat, ss, gg, select) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1527 return(stat * gene.std$get(gg)[select[,1]] / snps.std$get(ss)[select[,2]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1528 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1529 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1530 params$dfFull = dfFull;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1531 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1532 ################################# Saver class(es) creation ##############################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1533 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1534 status("Creating output file(s)");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1535 if(noFDRsaveMemory) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1536 if( pvOutputThreshold > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1537 saver.tra = .OutputSaver_direct$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1538 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1539 if( pvOutputThreshold.cis > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1540 saver.cis = .OutputSaver_direct$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1541 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1542 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1543 if( pvOutputThreshold > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1544 saver.tra = .OutputSaver_FRD$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1545 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1546 if( pvOutputThreshold.cis > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1547 saver.cis = .OutputSaver_FRD$new();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1548 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1549 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1550 if( pvOutputThreshold > 0 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1551 if( pvOutputThreshold * gene$nRows() * snps$nRows() > 1000000 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1552 if(!noFDRsaveMemory)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1553 cat('Warning: pvOutputThreshold may be too large.\nExpected number of findings > ',
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1554 pvOutputThreshold * gene$nRows() * snps$nRows(),'\n');
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1555 if( (useModel == modelLINEAR) || (useModel == modelLINEAR_CROSS) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1556 statistic_name = "t-stat";
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1557 } else if( useModel == modelANOVA ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1558 statistic_name = "F-test";
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1559 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1560 if(!is.null(betafun))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1561 statistic_name = paste('beta\t',statistic_name, sep='');
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1562 if( pvOutputThreshold > 0 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1563 saver.tra$start(output_file_name, statistic_name, snps, gene, testfun, pvfun);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1564 if( pvOutputThreshold.cis > 0 )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1565 saver.cis$start(output_file_name.cis, statistic_name, snps, gene, testfun, pvfun);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1566 rm( statistic_name );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1567 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1568 ################################# Some useful functions #################################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1569 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1570 orthonormalize.snps = function(cursnps, ss) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1571 for(p in 1:length(cursnps)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1572 if(length(correctionMatrix)>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1573 cursnps[[p]] = cursnps[[p]] %*% correctionMatrix;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1574 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1575 cursnps[[p]] = cursnps[[p]] - tcrossprod(cursnps[[p]],cvrt) %*% cvrt;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1576 for(w in .seq(1,p-1L))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1577 cursnps[[p]] = cursnps[[p]] - rowSums(cursnps[[p]]*cursnps[[w]]) * cursnps[[w]];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1578 div = sqrt( rowSums(cursnps[[p]]^2) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1579 div[ div == 0 ] = 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1580 cursnps[[p]] = cursnps[[p]]/div;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1581 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1582 snps.std$set(ss, div);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1583 return(cursnps);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1584 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1585 # if( pvOutputThreshold.cis > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1586 # is.cis.pair = function(gg,ss) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1587 # return(!( ( snpsloc[[ss]][1, 1] - tail( geneloc[[gg]][ , 2], n = 1L) > cisDist) |
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1588 # ( geneloc[[gg]][1, 1] - tail( snpsloc[[ss]] , n = 1L) > cisDist) ) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1589 # }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1590 # }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1591 if( pvOutputThreshold.cis > 0 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1592 # sn.l = sapply(snpsloc, function(x)x[1] );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1593 # sn.r = sapply(snpsloc, function(x)tail(x,1) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1594 # ge.l = sapply(geneloc, function(x)x[1,1] );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1595 # ge.r = sapply(geneloc, function(x)x[nrow(x) , 2] );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1596 sn.l = sapply(snpsloc, '[', 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1597 sn.r = sapply(snpsloc, tail, 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1598 ge.l = sapply(geneloc, '[', 1, 1 );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1599 ge.r = sapply( lapply(geneloc, tail.matrix, 1 ), '[', 2);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1600 gg.1 = findInterval( sn.l , ge.r + cisDist +1) + 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1601 # cat(gg.1,'\n')
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1602 gg.2 = findInterval( sn.r , ge.l - cisDist );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1603 # cat(gg.2,'\n')
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1604 rm(sn.l, sn.r, ge.l, ge.r);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1605 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1606
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1607 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1608 ################################# Prepare counters and histogram bins ###################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1609 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1610 pvbins = NULL; # bin edges for p-values
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1611 statbins = 0; # bin edges for the test statistic (|t| or F)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1612 do.hist = FALSE;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1613 if( length(pvalue.hist) == 1 ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1614 if(pvalue.hist == "qqplot") {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1615 pvbins = c(0, 10^rev(seq(0, log10(.Machine$double.xmin)-1, -0.05)));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1616 } else
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1617 if( is.numeric(pvalue.hist) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1618 pvbins = seq(from = 0, to = 1, length.out = pvalue.hist+1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1619 } else
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1620 if( pvalue.hist == TRUE ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1621 pvbins = seq(from = 0, to = 1, length.out = 100+1);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1622 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1623 } else
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1624 if( is.numeric(pvalue.hist) && (length(pvalue.hist) > 1) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1625 pvbins = pvalue.hist;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1626 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1627 if( is.null(pvbins) && (pvalue.hist != FALSE) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1628 stop("Wrong value of pvalue.hist. Must be FALSE, TRUE, \"qqplot\", or numerical");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1629 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1630 do.hist = !is.null(pvbins);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1631 if( do.hist ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1632 pvbins = sort(pvbins);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1633 statbins = threshfun(pvbins);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1634 if( pvOutputThreshold > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1635 hist.all = .histogrammer$new(pvbins, statbins);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1636 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1637 if( pvOutputThreshold.cis > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1638 hist.cis = .histogrammer$new(pvbins, statbins);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1639 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1640 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1641 rm( pvbins, statbins);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1642 if(min.pv.by.genesnp) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1643 if( pvOutputThreshold > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1644 minpv.tra = .minpvalue$new(snps,gene);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1645 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1646 if( pvOutputThreshold.cis > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1647 minpv.cis = .minpvalue$new(snps,gene);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1648 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1649 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1650 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1651 ################################# Main loop #############################################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1652 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1653 beta = NULL;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1654 n.tests.all = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1655 n.tests.cis = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1656 n.eqtls.tra = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1657 n.eqtls.cis = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1658
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1659 status("Performing eQTL analysis");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1660 # ss = 1; gg = 1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1661 # ss = snps$nSlices(); gg = gene$nSlices();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1662
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1663 snps_offset = 0;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1664 for(ss in 1:snps$nSlices()) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1665 # for(ss in 1:min(2,snps$nSlices())) { #for debug
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1666 cursnps = NULL;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1667 nrcs = nrow(snps$getSliceRaw(ss));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1668
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1669 # loop only through the useful stuff
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1670 for(gg in if(pvOutputThreshold>0){1:gene$nSlices()}else{.seq(gg.1[ss],gg.2[ss])} ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1671 gene_offset = gene_offsets[gg];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1672 curgene = gene$getSlice(gg);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1673 nrcg = nrow(curgene);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1674 if(nrcg == 0) next;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1675
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1676 rp = "";
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1677
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1678 statistic = NULL;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1679 select.cis.raw = NULL;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1680 ## do cis analysis
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1681 # if( (pvOutputThreshold.cis > 0) && ( is.cis.pair(gg, ss) ) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1682 if( (pvOutputThreshold.cis > 0) && (gg >= gg.1[ss]) && (gg <= gg.2[ss]) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1683
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1684 if( is.null( statistic ) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1685 if( is.null( cursnps ) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1686 cursnps = orthonormalize.snps( snps_process( snps$getSlice(ss) ), ss );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1687 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1688 mat = vector("list", length(cursnps));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1689 for(d in 1:length(cursnps)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1690 mat[[d]] = tcrossprod(curgene, cursnps[[d]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1691 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1692 statistic = statistic.fun( mat );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1693 astatistic = afun(statistic);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1694 # rm(mat);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1695 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1696
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1697 # sn.l = findInterval(geneloc[[gg]][ ,1] - cisDist-1 +1 , snpsloc[[ss]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1698 # sn.r = findInterval(geneloc[[gg]][ ,2] + cisDist -1 , snpsloc[[ss]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1699 sn.l = findInterval(geneloc[[gg]][ ,1] - cisDist-1, snpsloc[[ss]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1700 sn.r = findInterval(geneloc[[gg]][ ,2] + cisDist, snpsloc[[ss]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1701 xx = unlist(lapply(which(sn.r>sn.l),FUN=function(x){(sn.l[x]:(sn.r[x]-1))*nrow(statistic)+x}))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1702 select.cis.raw = xx[ astatistic[xx] >= thresh.cis ];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1703 select.cis = arrayInd(select.cis.raw, dim(statistic))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1704
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1705 n.tests.cis = n.tests.cis + length(xx);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1706 n.eqtls.cis = n.eqtls.cis + length(select.cis.raw);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1707
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1708 if( do.hist )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1709 hist.cis$update(astatistic[xx]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1710
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1711 if( min.pv.by.genesnp ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1712 # minpv.cis$updatecis(ss, gg, arrayInd(xx, dim(statistic)), astatistic[xx])
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1713 temp = double(length(astatistic));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1714 dim(temp) = dim(astatistic);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1715 temp[xx] = astatistic[xx];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1716 minpv.cis$update(ss, gg, temp);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1717 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1718
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1719 if(!is.null(betafun))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1720 beta = betafun(mat[[length(mat)]][select.cis.raw], ss, gg, select.cis);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1721
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1722 saver.cis$update( snps_offset + select.cis[ , 2],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1723 gene_offset + select.cis[ , 1],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1724 statistic[select.cis.raw],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1725 beta);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1726
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1727 # statistic.select.cis = statistic[ select.cis ];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1728 # test = testfun( statistic.select.cis );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1729 # pv = pvfun(test);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1730 # Saver.cis$WriteBlock( cbind(snps_offset + select.cis[ , 2], gene_offset + select.cis[ , 1], test, pv) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1731 # counter.cis$Update(gg, ss, select.cis, pv, n.tests = length(xx), if(do.hist) afun(statistic[xx]) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1732 rp = paste(rp, ", ", formatC(n.eqtls.cis, big.mark=",", format = "f", digits = 0), " cis-eQTLs", sep = "");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1733 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1734 ## do trans/all analysis
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1735 if(pvOutputThreshold>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1736 if( is.null( statistic ) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1737 if( is.null( cursnps ) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1738 cursnps = orthonormalize.snps( snps_process( snps$getSlice(ss) ), ss );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1739 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1740 mat = vector("list", length(cursnps));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1741 for(d in 1:length(cursnps)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1742 mat[[d]] = tcrossprod(curgene, cursnps[[d]]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1743 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1744 statistic = statistic.fun( mat );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1745 astatistic = afun(statistic);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1746 # rm(mat);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1747 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1748
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1749 if( do.hist )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1750 hist.all$update(astatistic);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1751
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1752 if(!is.null(select.cis.raw))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1753 astatistic[xx] = -1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1754 # select.tra.raw = select.tra.raw[!(select.tra.raw %in% select.cis.raw)];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1755
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1756 select.tra.raw = which( astatistic >= thresh);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1757 select.tra = arrayInd(select.tra.raw, dim(statistic))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1758
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1759 n.eqtls.tra = n.eqtls.tra + length(select.tra.raw);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1760 n.tests.all = n.tests.all + length(statistic);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1761
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1762 if(!is.null(betafun))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1763 beta = betafun(mat[[length(mat)]][select.tra.raw], ss, gg, select.tra);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1764
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1765 saver.tra$update( snps_offset + select.tra[ , 2],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1766 gene_offset + select.tra[ , 1],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1767 statistic[select.tra.raw],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1768 beta);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1769
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1770 if( min.pv.by.genesnp )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1771 minpv.tra$update(ss, gg, astatistic)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1772
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1773 # statistic.select.tra = statistic[ select.tra ];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1774 # test = testfun( statistic.select.tra );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1775 # pv = pvfun( test );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1776 # Saver$WriteBlock( cbind( snps_offset + select.tra[ , 2], gene_offset + select.tra[ , 1], test, pv) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1777 # counter$Update(gg, ss, select.tra, pv, n.tests = nrcs*nrcg, if(do.hist) afun(statistic) )
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1778 rp = paste(rp, ", ", formatC(n.eqtls.tra, big.mark=",", format = "f", digits = 0), if(pvOutputThreshold.cis > 0)" trans-"else" ","eQTLs", sep = "")
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1779 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1780
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1781 #gene_offset = gene_offset + nrcg;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1782 if( !is.null(statistic) ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1783 per = 100*(gg/gene$nSlices() + ss-1) / snps$nSlices();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1784 cat( formatC(floor(per*100)/100, format = "f", width = 5, digits = 2), "% done" , rp, "\n", sep = "");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1785 flush.console();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1786 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1787 } # gg in 1:gene$nSlices()
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1788 snps_offset = snps_offset + nrow(snps$getSliceRaw(ss));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1789 } # ss in 1:snps$nSlices()
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1790 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1791 ################################# Results collection ####################################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1792 {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1793 rez = list(time.in.sec = proc.time()[3] - start.time);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1794 rez$param = params;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1795
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1796 if(pvOutputThreshold.cis > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1797 rez.cis = list(ntests = n.tests.cis, neqtls = n.eqtls.cis);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1798 rez.cis = c(rez.cis, saver.cis$getResults( gene, snps, n.tests.cis) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1799 if(do.hist)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1800 rez.cis = c(rez.cis, hist.cis$getResults() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1801 if(min.pv.by.genesnp)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1802 rez.cis = c(rez.cis, minpv.cis$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1803 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1804
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1805 if(pvOutputThreshold>0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1806 rez.all = list(ntests = n.tests.all, neqtls = n.eqtls.tra + n.eqtls.cis);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1807 if(pvOutputThreshold.cis > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1808 rez.tra = list(ntests = n.tests.all - n.tests.cis, neqtls = n.eqtls.tra);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1809 rez.tra = c(rez.tra, saver.tra$getResults( gene, snps, n.tests.all - n.tests.cis) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1810 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1811 rez.all = c(rez.all, saver.tra$getResults( gene, snps, n.tests.all ) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1812 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1813 if(do.hist) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1814 rez.all = c(rez.all, hist.all$getResults() );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1815 if(pvOutputThreshold.cis > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1816 rez.tra$hist.bins = rez.all$hist.bins;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1817 rez.tra$hist.counts = rez.all$hist.counts - rez.cis$hist.counts;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1818 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1819 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1820 if(min.pv.by.genesnp) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1821 if(pvOutputThreshold.cis > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1822 rez.tra = c(rez.tra, minpv.tra$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1823 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1824 rez.all = c(rez.all, minpv.tra$getResults(snps, gene, pvfun = function(x){pvfun(testfun(x))}) );
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1825 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1826 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1827 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1828
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1829 if(exists('rez.all')>0)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1830 rez$all = rez.all;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1831 if(exists('rez.tra')>0)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1832 rez$trans = rez.tra;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1833 if(exists('rez.cis')>0)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1834 rez$cis = rez.cis;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1835
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1836 class(rez) = c(class(rez),"MatrixEQTL");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1837 status("");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1838 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1839 # cat('s std ',snps.std$get(1),'\n');
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1840 # cat('g std ',gene.std$get(1),'\n');
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1841 ################################# Results collection ####################################
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1842 return(rez);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1843 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1844
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1845 .histme = function(m, name1, name2, ...) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1846 cnts = m$hist.counts;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1847 bins = m$hist.bins;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1848 ntst = m$ntests;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1849 centers = 0.5 * (bins[-1L] + bins[-length(bins)]);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1850 density = 0.5 / (bins[-1L] - centers) * cnts / ntst;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1851 ntext = paste("P-value distribution for ", name1, formatC(ntst, big.mark=",", format = "f", digits = 0), name2, " gene-SNP pairs ",sep="");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1852 r = structure(list(breaks = bins, counts = cnts, density = density,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1853 mids = centers, equidist = FALSE), class = "histogram");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1854 plot(r, main = ntext, ylab = "Density", xlab = "P-values", ...)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1855 abline( h = 1, col = "blue");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1856 return(invisible());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1857 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1858
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1859 .qqme = function(m, lcol, cex, pch, ...) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1860 cnts = m$hist.counts;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1861 bins = m$hist.bins;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1862 ntst = m$ntests;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1863
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1864 cusu = cumsum(cnts) / ntst;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1865 ypos = bins[-1][is.finite(cusu)];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1866 xpos = cusu[is.finite(cusu)];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1867 lines(-log10(xpos), -log10(ypos), col = lcol, ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1868 # lines(xpos, ypos, col = lcol, ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1869 if(length(m$eqtls$pvalue)==0)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1870 return();
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1871 ypvs = -log10(m$eqtls$pvalue);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1872 xpvs = -log10(1:length(ypvs) / ntst);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1873 if(length(ypvs) > 1000) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1874 # need to filter a bit, make the plotting faster
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1875 levels = as.integer( xpvs/xpvs[1] * 1e3);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1876 keep = c(TRUE, diff(levels)!=0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1877 levels = as.integer( ypvs/ypvs[1] * 1e3);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1878 keep = keep | c(TRUE, diff(levels)!=0);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1879 ypvs = ypvs[keep];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1880 xpvs = xpvs[keep];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1881 rm(keep)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1882 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1883 points(xpvs, ypvs, col = lcol, pch = pch, cex = cex, ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1884 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1885
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1886 plot.MatrixEQTL = function(x, cex = 0.5, pch = 19, xlim = NULL, ylim = NULL, ...) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1887 if( x$param$pvalue.hist == FALSE ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1888 warning("Cannot plot p-value distribution: the information was not recorded.\nUse pvalue.hist!=FALSE.");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1889 return(invisible());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1890 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1891 if( x$param$pvalue.hist == "qqplot" ) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1892 xmin = 1/max(x$cis$ntests, x$all$ntests);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1893 ymax = NULL;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1894 if(!is.null(ylim)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1895 ymax = ylim[2];
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1896 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1897 ymax = -log10(min(
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1898 x$cis$eqtls$pvalue[1], x$cis$hist.bins[ c(FALSE,x$cis$hist.counts>0)][1],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1899 x$all$eqtls$pvalue[1], x$all$hist.bins[ c(FALSE,x$all$hist.counts>0)][1],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1900 x$trans$eqtls$pvalue[1], x$trans$hist.bins[c(FALSE,x$trans$hist.counts>0)][1],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1901 na.rm = TRUE))+0.1;
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1902 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1903 if(ymax == 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1904 ymax = -log10(.Machine$double.xmin)
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1905 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1906 if(!is.null(ymax))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1907 ylim = c(0,ymax);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1908
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1909 if(is.null(xlim))
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1910 xlim = c(0, -log10(xmin/1.5));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1911
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1912 plot(numeric(),numeric(), xlab = "-Log10(p-value), theoretical",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1913 ylab = "-Log10(p-value), observed",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1914 xlim = c(0, -log10(xmin/1.5)),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1915 ylim = ylim,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1916 xaxs="i", yaxs="i", ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1917 lines(c(0,1e3), c(0,1e3), col = "gray");
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1918 if((x$param$pvOutputThreshold > 0) && (x$param$pvOutputThreshold.cis > 0)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1919 .qqme( x$cis, "red", cex, pch, ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1920 .qqme( x$trans, "blue", cex, pch, ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1921 title(paste("QQ-plot for",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1922 formatC(x$cis$ntests, big.mark=",", format = "f", digits = 0),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1923 "local and",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1924 formatC(x$trans$ntests, big.mark=",", format = "f", digits = 0),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1925 "distant gene-SNP p-values"));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1926 lset = c(1,2,4);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1927 } else
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1928 if(x$param$pvOutputThreshold.cis > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1929 .qqme(x$cis, "red", cex, pch, ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1930 title(paste("QQ-plot for",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1931 formatC(x$cis$ntests, big.mark=",", format = "f", digits = 0),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1932 "local gene-SNP p-values"));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1933 lset = c(1,4);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1934 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1935 .qqme(x$all, "blue", cex, pch, ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1936 title(paste("QQ-plot for all",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1937 formatC(x$all$ntests, big.mark=",", format = "f", digits = 0),
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1938 "gene-SNP p-values"));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1939 lset = c(3,4);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1940 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1941 legend("topleft",
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1942 c("Local p-values","Distant p-values","All p-values","diagonal")[lset],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1943 col = c("red","blue","blue","gray")[lset],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1944 text.col = c("red","blue","blue","gray")[lset],
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1945 pch = 20, lwd = 1, pt.cex = c(1,1,1,0)[lset])
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1946 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1947 if((x$param$pvOutputThreshold > 0) && (x$param$pvOutputThreshold.cis > 0)) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1948 par(mfrow=c(2,1));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1949 .histme(x$cis, "", " local", ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1950 tran = list(hist.counts = x$all$hist.counts - x$cis$hist.counts,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1951 hist.bins = x$all$hist.bins,
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1952 ntests = x$all$ntests - x$cis$ntests);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1953 .histme(x$trans,""," distant", ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1954 par(mfrow=c(1,1));
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1955 } else
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1956 if(x$param$pvOutputThreshold.cis > 0) {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1957 .histme(x$cis, "", " local", ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1958 } else {
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1959 .histme(x$all, "all ", "" , ...);
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1960 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1961 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1962 return(invisible());
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1963 }
cd4c8e4a4b5b Uploaded
jasonxu
parents:
diff changeset
1964