Mercurial > repos > marie-tremblay-metatoul > nmr_annotation
comparison nmr_preprocessing/ReadFids_Manon.R @ 2:7304ec2c9ab7 draft
Uploaded
author | marie-tremblay-metatoul |
---|---|
date | Mon, 30 Jul 2018 10:33:03 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:b55559a2854f | 2:7304ec2c9ab7 |
---|---|
1 ################################################################################################ | |
2 # | |
3 # Read FIDs in Bruker format | |
4 # | |
5 # | |
6 ################################################################################################ | |
7 | |
8 # ReadFid ============================================================================== | |
9 ReadFid <- function(path) { | |
10 | |
11 # Read 1D FID using Bruker XWinNMR and TopSpin format. It is inspired of the | |
12 # matNMR matlab library which deals with 2D FID and also other formats | |
13 # Read also the parameters in the acqus file | |
14 | |
15 paramFile <- file.path(path, "acqus") | |
16 # BYTEORDA: 0 -> Little Endian 1 -> Big Endian | |
17 params <- readParams(paramFile, c("TD", "BYTORDA", "DIGMOD", "DECIM", "DSPFVS", | |
18 "SW_h", "SW", "O1")) | |
19 | |
20 if (params[["DSPFVS"]] >= 20) { | |
21 # The group delay first order phase correction is given directly from version 20 | |
22 grpdly <- readParams(paramFile, c("GRPDLY")) | |
23 params[["GRPDLY"]] <- grpdly[["GRPDLY"]] | |
24 } | |
25 TD <- params[["TD"]] | |
26 endianness <- if (params$BYTORDA) | |
27 "big" else "little" | |
28 if (TD%%2 != 0) { | |
29 stop(paste("Only even numbers are allowed for size in TD because it is complex | |
30 data with the real and imaginary part for each element.", | |
31 "The TD value is in the", paramFile, "file")) | |
32 } | |
33 | |
34 # Interpret params Dwell Time, time between 2 data points in the FID | |
35 params[["DT"]] <- 1/(2 * params[["SW_h"]]) | |
36 | |
37 # Read fid | |
38 fidFile <- file.path(path, "fid") | |
39 fidOnDisk <- readBin(fidFile, what = "int", n = TD, size = 4L, endian = endianness) | |
40 | |
41 # Real size that is on disk (it should be equal to TD2, except for TopSpin/Bruker | |
42 # (which is our case) according to matNMR as just discussed | |
43 TDOnDisk <- length(fidOnDisk) | |
44 if (TDOnDisk < TD) { | |
45 warning("Size is smaller than expected, the rest is filled with zero so the size is the same for every fid") | |
46 fidGoodSize <- sapply(vector("list", length = TD), function(x) 0) | |
47 fidGoodSize[1:TDOnDisk] <- fidOnDisk | |
48 | |
49 } else if (TDOnDisk > TD) { | |
50 warning("Size is bigger than expected, the rest ignored so the size is the same for every fid") | |
51 fidGoodSize <- fidOnDisk(1:TD) | |
52 | |
53 } else { | |
54 fidGoodSize <- fidOnDisk | |
55 } | |
56 | |
57 fidRePart <- fidGoodSize[seq(from = 1, to = TD, by = 2)] | |
58 fidImPart <- fidGoodSize[seq(from = 2, to = TD, by = 2)] | |
59 fid <- complex(real = fidRePart, imaginary = fidImPart) | |
60 | |
61 return(list(fid = fid, params = params)) | |
62 } | |
63 | |
64 | |
65 | |
66 | |
67 # getDirsContainingFid ============================================================================== | |
68 getDirsContainingFid <- function(path) { | |
69 subdirs <- dir(path, full.names = TRUE) | |
70 if (length(subdirs) > 0) { | |
71 cond <- sapply(subdirs, function(x) { | |
72 content <- dir(x) | |
73 # subdirs must contain fid, acqu and acqus files | |
74 return("fid" %in% content && "acqu" %in% content && "acqus" %in% content) | |
75 }) | |
76 subdirs <- subdirs[cond] | |
77 } | |
78 return(subdirs) | |
79 } | |
80 | |
81 | |
82 | |
83 | |
84 | |
85 | |
86 # beginTreatment ============================================================================== | |
87 | |
88 beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL, | |
89 force.real = FALSE) { | |
90 | |
91 cat("Begin", name, "\n") | |
92 | |
93 | |
94 # Formatting the Signal_data and Signal_info ----------------------- | |
95 | |
96 vec <- is.vector(Signal_data) | |
97 if (vec) { | |
98 Signal_data <- vec2mat(Signal_data) | |
99 } | |
100 if (is.vector(Signal_info)) { | |
101 Signal_info <- vec2mat(Signal_info) | |
102 } | |
103 if (!is.null(Signal_data)) { | |
104 if (!is.matrix(Signal_data)) { | |
105 stop("Signal_data is not a matrix.") | |
106 } | |
107 if (!is.complex(Signal_data) && !is.numeric(Signal_data)) { | |
108 stop("Signal_data contains non-numerical values.") | |
109 } | |
110 } | |
111 if (!is.null(Signal_info) && !is.matrix(Signal_info)) { | |
112 stop("Signal_info is not a matrix.") | |
113 } | |
114 | |
115 | |
116 Original_data <- Signal_data | |
117 | |
118 # Extract the real part of the spectrum --------------------------- | |
119 | |
120 if (force.real) { | |
121 if (is.complex(Signal_data)) { | |
122 Signal_data <- Re(Signal_data) | |
123 } else { | |
124 # The signal is numeric Im(Signal_data) is zero anyway so let's avoid | |
125 # using complex(real=...,imaginary=0) which would give a complex signal | |
126 # in endTreatment() | |
127 force.real <- FALSE | |
128 } | |
129 } | |
130 | |
131 | |
132 # Return the formatted data and metadata entries -------------------- | |
133 | |
134 return(list(start = proc.time(), vec = vec, force.real = force.real, | |
135 Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info)) | |
136 } | |
137 | |
138 | |
139 # endTreatment ============================================================================== | |
140 | |
141 endTreatment <- function(name, begin_info, Signal_data) { | |
142 end_time = proc.time() # record it as soon as possible | |
143 start_time = begin_info[["start"]] | |
144 delta_time = end_time - start_time | |
145 delta = delta_time[] | |
146 cat("End", name, "\n") | |
147 cat("It lasted", | |
148 round(delta["user.self"], 3), "s user time,", | |
149 round(delta["sys.self"] , 3), "s system time and", | |
150 round(delta["elapsed"] , 3), "s elapsed time.\n") | |
151 if (begin_info[["force.real"]]) { | |
152 # The imaginary part is left untouched | |
153 i <- complex(real=0, imaginary=1) | |
154 Signal_data = Signal_data + i * Im(begin_info[["Original_data"]]) | |
155 } | |
156 if (begin_info[["vec"]]) { | |
157 Signal_data = Signal_data[1,] | |
158 } | |
159 return(Signal_data) | |
160 } | |
161 | |
162 | |
163 # checkArg ============================================================================== | |
164 | |
165 checkArg <- function(arg, checks, can.be.null=FALSE) { | |
166 check.list <- list(bool=c(is.logical, "a boolean"), | |
167 int =c(function(x){x%%1==0}, "an integer"), | |
168 num =c(is.numeric, "a numeric"), | |
169 str =c(is.character, "a string"), | |
170 pos =c(function(x){x>0}, "positive"), | |
171 pos0=c(function(x){x>=0}, "positive or zero"), | |
172 l1 =c(function(x){length(x)==1}, "of length 1") | |
173 ) | |
174 if (is.null(arg)) { | |
175 if (!can.be.null) { | |
176 stop(deparse(substitute(arg)), " is null.") | |
177 } | |
178 } else { | |
179 if (is.matrix(arg)) { | |
180 stop(deparse(substitute(arg)), " is not scalar.") | |
181 } | |
182 for (c in checks) { | |
183 if (!check.list[[c]][[1]](arg)) { | |
184 stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".") | |
185 } | |
186 } | |
187 } | |
188 } | |
189 | |
190 | |
191 # getArg ============================================================================== | |
192 | |
193 getArg <- function(arg, info, argname, can.be.absent=FALSE) { | |
194 if (is.null(arg)) { | |
195 start <- paste("impossible to get argument", argname, "it was not given directly and"); | |
196 if (!is.matrix(info)) { | |
197 stop(paste(start, "the info matrix was not given")) | |
198 } | |
199 if (!(argname %in% colnames(info))) { | |
200 if (can.be.absent) { | |
201 return(NULL) | |
202 } else { | |
203 stop(paste(start, "is not in the info matrix")) | |
204 } | |
205 } | |
206 if (nrow(info) < 1) { | |
207 stop(paste(start, "the info matrix has no row")) | |
208 } | |
209 arg <- info[1,argname] | |
210 if (is.na(arg)) { | |
211 stop(paste(start, "it is NA in the info matrix")) | |
212 } | |
213 } | |
214 return(arg) | |
215 } | |
216 | |
217 | |
218 | |
219 # getTitle ============================================================================== | |
220 | |
221 # Get the name of the signal from the title file or fromt the name of the subdirectory | |
222 # Get the name of the signal from the title file or fromt the name of the subdirectory | |
223 | |
224 getTitle <- function(path, l, subdirs) { | |
225 title <- NULL | |
226 title_file <- file.path(file.path(file.path(path, "pdata"), "1"), "title") | |
227 if (file.exists(title_file)) { | |
228 lines <- readLines(title_file, warn = FALSE) | |
229 if (length(lines) >= 1) { | |
230 first_line <- gsub("^\\s+|\\s+$", "", lines[l]) | |
231 if (nchar(first_line) >= 1) { | |
232 title <- first_line | |
233 } else { | |
234 warning(paste("The first line of the title file is blank for directory ", | |
235 path)) | |
236 } | |
237 } else { | |
238 warning(paste("The title file is empty for directory ", path)) | |
239 } | |
240 } else { | |
241 warning(paste("Title file doesn't exists for directory ", path, "\n the repertory name is used instead")) | |
242 } | |
243 if (is.null(title)) { | |
244 if(subdirs) { | |
245 separator <- .Platform$file.sep | |
246 path_elem <- strsplit(path,separator)[[1]] | |
247 title <- paste(path_elem[length(path_elem)-1], path_elem[length(path_elem)], sep = "_") | |
248 } else{title <- basename(path)} | |
249 } | |
250 return(title) | |
251 } | |
252 | |
253 | |
254 | |
255 # readParams ============================================================================== | |
256 # Read parameter values for Fid_info in the ReadFids function | |
257 | |
258 readParams <- function(file, paramsName) { | |
259 | |
260 isDigit <- function(c) { | |
261 return(suppressWarnings(!is.na(as.numeric(c)))) | |
262 } | |
263 lines <- readLines(file) | |
264 params <- sapply(paramsName, function(x) NULL) | |
265 | |
266 for (paramName in paramsName) { | |
267 # Find the line with the parameter I add a '$' '=' in the pattern so that for | |
268 # example 'TD0' is not found where I look for 'TD' and LOCSW and WBSW when I look | |
269 # for 'SW' | |
270 pattern <- paste("\\$", paramName, "=", sep = "") | |
271 occurences <- grep(pattern, lines) | |
272 if (length(occurences) == 0L) { | |
273 stop(paste(file, "has no field", pattern)) | |
274 } | |
275 if (length(occurences) > 1L) { | |
276 warning(paste(file, "has more that one field", pattern, " I take the first one")) | |
277 } | |
278 line <- lines[occurences[1]] | |
279 | |
280 # Cut beginning and end of the line '##$TD= 65536' -> '65536' | |
281 igual = as.numeric(regexpr("=", line)) | |
282 | |
283 first <- igual | |
284 while (first <= nchar(line) & !isDigit(substr(line, first, first))) { | |
285 first <- first + 1 | |
286 } | |
287 last <- nchar(line) | |
288 while (last > 0 & !isDigit(substr(line, last, last))) { | |
289 last <- last - 1 | |
290 } | |
291 params[paramName] <- as.numeric(substr(line, first, last)) | |
292 } | |
293 return(params) | |
294 } | |
295 | |
296 | |
297 | |
298 # ReadFids ============================================================================== | |
299 ReadFids <- function(path, l = 1, subdirs = FALSE, subdirs.names = FALSE) { | |
300 | |
301 # Data initialisation and checks ---------------------------------------------- | |
302 begin_info <- beginTreatment("ReadFids") | |
303 checkArg(path, c("str")) | |
304 checkArg(l, c("pos")) | |
305 if (file.exists(path) == FALSE) { | |
306 stop(paste("Invalid path:", path)) | |
307 } | |
308 | |
309 | |
310 # Extract the FIDs and their info ---------------------------------------------- | |
311 | |
312 if (subdirs == FALSE) { | |
313 fidDirs <- getDirsContainingFid(path) | |
314 n <- length(fidDirs) | |
315 if (n == 0L) { | |
316 stop(paste("No valid fid in", path)) | |
317 } | |
318 fidNames <- sapply(X = fidDirs, FUN = getTitle, l = l, subdirs = subdirs, USE.NAMES = F) | |
319 for (i in 1:n) { | |
320 fidList <- ReadFid(fidDirs[i]) | |
321 fid <- fidList[["fid"]] | |
322 info <- fidList[["params"]] | |
323 m <- length(fid) | |
324 if (i == 1) { | |
325 Fid_data <- matrix(nrow = n, ncol = m, dimnames = list(fidNames, | |
326 info[["DT"]] * (0:(m - 1)))) | |
327 Fid_info <- matrix(nrow = n, ncol = length(info), dimnames = list(fidNames, | |
328 names(info))) | |
329 } | |
330 Fid_data[i, ] <- fid | |
331 Fid_info[i, ] <- unlist(info) | |
332 } | |
333 | |
334 } else { | |
335 maindirs <- dir(path, full.names = TRUE) # subdirectories | |
336 Fid_data <- numeric() | |
337 Fid_info <- numeric() | |
338 | |
339 fidDirs <- c() | |
340 for (j in maindirs) { | |
341 fd <- getDirsContainingFid(j) # recoved FIDs from subdirectories | |
342 n <- length(fd) | |
343 if (n > 0L) { | |
344 fidDirs <- c(fidDirs, fd) | |
345 } else {warning(paste("No valid fid in",j ))} | |
346 } | |
347 | |
348 if (subdirs.names==TRUE) { | |
349 fidNames <- dir(path) | |
350 } else {fidNames <- sapply(X = fidDirs, FUN = getTitle, l = l, subdirs = subdirs, USE.NAMES = F)} | |
351 | |
352 for (i in 1:length(fidNames)) { | |
353 fidList <- ReadFid(fidDirs[i]) | |
354 fid <- fidList[["fid"]] | |
355 info <- fidList[["params"]] | |
356 m <- length(fid) | |
357 if (i == 1) { | |
358 Fid_data <- matrix(nrow = length(fidNames), ncol = m, dimnames = list(fidNames, | |
359 info[["DT"]] * (0:(m - 1)))) | |
360 Fid_info <- matrix(nrow = length(fidNames), ncol = length(info), dimnames = list(fidNames, | |
361 names(info))) | |
362 } | |
363 Fid_data[i, ] <- fid | |
364 Fid_info[i, ] <- unlist(info) | |
365 } | |
366 | |
367 | |
368 } | |
369 | |
370 # Check for non-unique IDs ---------------------------------------------- | |
371 NonnuniqueIds <- sum(duplicated(row.names(Fid_data))) | |
372 cat("dim Fid_data: ", dim(Fid_data), "\n") | |
373 cat("IDs: ", rownames(Fid_data), "\n") | |
374 cat("non-unique IDs?", NonnuniqueIds, "\n") | |
375 if (NonnuniqueIds > 0) { | |
376 warning("There are duplicated IDs: ", Fid_data[duplicated(Fid_data)]) | |
377 } | |
378 | |
379 | |
380 # Return the results ---------------------------------------------- | |
381 return(list(Fid_data = endTreatment("ReadFids", begin_info, Fid_data), Fid_info = Fid_info)) | |
382 | |
383 } | |
384 | |
385 | |
386 |