Mercurial > repos > marie-tremblay-metatoul > nmr_annotation
comparison nmr_preprocessing/ReadFids_script.R @ 2:7304ec2c9ab7 draft
Uploaded
author | marie-tremblay-metatoul |
---|---|
date | Mon, 30 Jul 2018 10:33:03 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
1:b55559a2854f | 2:7304ec2c9ab7 |
---|---|
1 ################################################################################################ | |
2 # | |
3 # Read FIDs in Bruker format | |
4 # | |
5 # | |
6 ################################################################################################ | |
7 | |
8 | |
9 # vec2mat ============================================================================== | |
10 vec2mat <- function(vec) { | |
11 return(matrix(vec, nrow = 1, dimnames = list(c(1), names(vec)))) | |
12 } | |
13 | |
14 | |
15 # ReadFid ============================================================================== | |
16 ReadFid <- function(path) { | |
17 | |
18 # Read 1D FID using Bruker XWinNMR and TopSpin format. It is inspired of the | |
19 # matNMR matlab library which deals with 2D FID and also other formats | |
20 # Read also the parameters in the acqus file | |
21 | |
22 paramFile <- file.path(path, "acqus") | |
23 # BYTEORDA: 0 -> Little Endian 1 -> Big Endian | |
24 params <- readParams(paramFile, c("TD", "BYTORDA", "DIGMOD", "DECIM", "DSPFVS", | |
25 "SW_h", "SW", "O1")) | |
26 | |
27 if (params[["DSPFVS"]] >= 20) { | |
28 # The group delay first order phase correction is given directly from version 20 | |
29 grpdly <- readParams(paramFile, c("GRPDLY")) | |
30 params[["GRPDLY"]] <- grpdly[["GRPDLY"]] | |
31 } | |
32 TD <- params[["TD"]] | |
33 endianness <- if (params$BYTORDA) | |
34 "big" else "little" | |
35 if (TD%%2 != 0) { | |
36 stop(paste("Only even numbers are allowed for size in TD because it is complex | |
37 data with the real and imaginary part for each element.", | |
38 "The TD value is in the", paramFile, "file")) | |
39 } | |
40 | |
41 # Interpret params Dwell Time, time between 2 data points in the FID | |
42 params[["DT"]] <- 1/(2 * params[["SW_h"]]) | |
43 | |
44 # Read fid | |
45 fidFile <- file.path(path, "fid") | |
46 fidOnDisk <- readBin(fidFile, what = "int", n = TD, size = 4L, endian = endianness) | |
47 | |
48 # Real size that is on disk (it should be equal to TD2, except for TopSpin/Bruker | |
49 # (which is our case) according to matNMR as just discussed | |
50 TDOnDisk <- length(fidOnDisk) | |
51 if (TDOnDisk < TD) { | |
52 warning("Size is smaller than expected, the rest is filled with zero so the size is the same for every fid") | |
53 fidGoodSize <- sapply(vector("list", length = TD), function(x) 0) | |
54 fidGoodSize[1:TDOnDisk] <- fidOnDisk | |
55 | |
56 } else if (TDOnDisk > TD) { | |
57 warning("Size is bigger than expected, the rest ignored so the size is the same for every fid") | |
58 fidGoodSize <- fidOnDisk(1:TD) | |
59 | |
60 } else { | |
61 fidGoodSize <- fidOnDisk | |
62 } | |
63 | |
64 fidRePart <- fidGoodSize[seq(from = 1, to = TD, by = 2)] | |
65 fidImPart <- fidGoodSize[seq(from = 2, to = TD, by = 2)] | |
66 fid <- complex(real = fidRePart, imaginary = fidImPart) | |
67 | |
68 return(list(fid = fid, params = params)) | |
69 } | |
70 | |
71 | |
72 | |
73 | |
74 # getDirsContainingFid ============================================================================== | |
75 getDirsContainingFid <- function(path) { | |
76 subdirs <- dir(path, full.names = TRUE) | |
77 if (length(subdirs) > 0) { | |
78 cond <- sapply(subdirs, function(x) { | |
79 content <- dir(x) | |
80 # subdirs must contain fid, acqu and acqus files | |
81 return("fid" %in% content && "acqu" %in% content && "acqus" %in% content) | |
82 }) | |
83 subdirs <- subdirs[cond] | |
84 } | |
85 return(subdirs) | |
86 } | |
87 | |
88 | |
89 | |
90 | |
91 | |
92 | |
93 # beginTreatment ============================================================================== | |
94 | |
95 beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL, | |
96 force.real = FALSE) { | |
97 | |
98 cat("Begin", name, "\n") | |
99 | |
100 | |
101 # Formatting the Signal_data and Signal_info ----------------------- | |
102 | |
103 vec <- is.vector(Signal_data) | |
104 if (vec) { | |
105 Signal_data <- vec2mat(Signal_data) | |
106 } | |
107 if (is.vector(Signal_info)) { | |
108 Signal_info <- vec2mat(Signal_info) | |
109 } | |
110 if (!is.null(Signal_data)) { | |
111 if (!is.matrix(Signal_data)) { | |
112 stop("Signal_data is not a matrix.") | |
113 } | |
114 if (!is.complex(Signal_data) && !is.numeric(Signal_data)) { | |
115 stop("Signal_data contains non-numerical values.") | |
116 } | |
117 } | |
118 if (!is.null(Signal_info) && !is.matrix(Signal_info)) { | |
119 stop("Signal_info is not a matrix.") | |
120 } | |
121 | |
122 | |
123 Original_data <- Signal_data | |
124 | |
125 # Extract the real part of the spectrum --------------------------- | |
126 | |
127 if (force.real) { | |
128 if (is.complex(Signal_data)) { | |
129 Signal_data <- Re(Signal_data) | |
130 } else { | |
131 # The signal is numeric Im(Signal_data) is zero anyway so let's avoid | |
132 # using complex(real=...,imaginary=0) which would give a complex signal | |
133 # in endTreatment() | |
134 force.real <- FALSE | |
135 } | |
136 } | |
137 | |
138 | |
139 # Return the formatted data and metadata entries -------------------- | |
140 | |
141 return(list(start = proc.time(), vec = vec, force.real = force.real, | |
142 Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info)) | |
143 } | |
144 | |
145 | |
146 # endTreatment ============================================================================== | |
147 | |
148 endTreatment <- function(name, begin_info, Signal_data) { | |
149 end_time = proc.time() # record it as soon as possible | |
150 start_time = begin_info[["start"]] | |
151 delta_time = end_time - start_time | |
152 delta = delta_time[] | |
153 cat("End", name, "\n") | |
154 cat("It lasted", | |
155 round(delta["user.self"], 3), "s user time,", | |
156 round(delta["sys.self"] , 3), "s system time and", | |
157 round(delta["elapsed"] , 3), "s elapsed time.\n") | |
158 if (begin_info[["force.real"]]) { | |
159 # The imaginary part is left untouched | |
160 i <- complex(real=0, imaginary=1) | |
161 Signal_data = Signal_data + i * Im(begin_info[["Original_data"]]) | |
162 } | |
163 if (begin_info[["vec"]]) { | |
164 Signal_data = Signal_data[1,] | |
165 } | |
166 return(Signal_data) | |
167 } | |
168 | |
169 | |
170 # checkArg ============================================================================== | |
171 | |
172 checkArg <- function(arg, checks, can.be.null=FALSE) { | |
173 check.list <- list(bool=c(is.logical, "a boolean"), | |
174 int =c(function(x){x%%1==0}, "an integer"), | |
175 num =c(is.numeric, "a numeric"), | |
176 str =c(is.character, "a string"), | |
177 pos =c(function(x){x>0}, "positive"), | |
178 pos0=c(function(x){x>=0}, "positive or zero"), | |
179 l1 =c(function(x){length(x)==1}, "of length 1") | |
180 ) | |
181 if (is.null(arg)) { | |
182 if (!can.be.null) { | |
183 stop(deparse(substitute(arg)), " is null.") | |
184 } | |
185 } else { | |
186 if (is.matrix(arg)) { | |
187 stop(deparse(substitute(arg)), " is not scalar.") | |
188 } | |
189 for (c in checks) { | |
190 if (!check.list[[c]][[1]](arg)) { | |
191 stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".") | |
192 } | |
193 } | |
194 } | |
195 } | |
196 | |
197 | |
198 # getArg ============================================================================== | |
199 | |
200 getArg <- function(arg, info, argname, can.be.absent=FALSE) { | |
201 if (is.null(arg)) { | |
202 start <- paste("impossible to get argument", argname, "it was not given directly and"); | |
203 if (!is.matrix(info)) { | |
204 stop(paste(start, "the info matrix was not given")) | |
205 } | |
206 if (!(argname %in% colnames(info))) { | |
207 if (can.be.absent) { | |
208 return(NULL) | |
209 } else { | |
210 stop(paste(start, "is not in the info matrix")) | |
211 } | |
212 } | |
213 if (nrow(info) < 1) { | |
214 stop(paste(start, "the info matrix has no row")) | |
215 } | |
216 arg <- info[1,argname] | |
217 if (is.na(arg)) { | |
218 stop(paste(start, "it is NA in the info matrix")) | |
219 } | |
220 } | |
221 return(arg) | |
222 } | |
223 | |
224 | |
225 | |
226 # getTitle ============================================================================== | |
227 | |
228 # Get the name of the signal from the title file or fromt the name of the subdirectory | |
229 # Get the name of the signal from the title file or fromt the name of the subdirectory | |
230 | |
231 getTitle <- function(path, l, subdirs) { | |
232 title <- NULL | |
233 title_file <- file.path(file.path(file.path(path, "pdata"), "1"), "title") | |
234 if (file.exists(title_file)) { | |
235 lines <- readLines(title_file, warn = FALSE) | |
236 if (length(lines) >= 1) { | |
237 first_line <- gsub("^\\s+|\\s+$", "", lines[l]) | |
238 if (nchar(first_line) >= 1) { | |
239 title <- first_line | |
240 } else { | |
241 warning(paste("The", l ,"line of the title file is blank for directory ", | |
242 path, "and the (sub)dirs names are used instead")) | |
243 } | |
244 } else { | |
245 warning(paste("The title file is empty for directory ", path, "and the (sub)dirs names are used instead")) | |
246 } | |
247 } else { | |
248 warning(paste("Title file doesn't exists for directory ", path, "\n the (sub)dirs names are used instead")) | |
249 } | |
250 if (is.null(title)) { | |
251 if(subdirs) { | |
252 separator <- .Platform$file.sep | |
253 path_elem <- strsplit(path,separator)[[1]] | |
254 title <- paste(path_elem[length(path_elem)-1], path_elem[length(path_elem)], sep = "_") | |
255 } else{title <- basename(path)} | |
256 } | |
257 return(title) | |
258 } | |
259 | |
260 | |
261 | |
262 | |
263 # readParams ============================================================================== | |
264 # Read parameter values for Fid_info in the ReadFids function | |
265 | |
266 readParams <- function(file, paramsName) { | |
267 | |
268 isDigit <- function(c) { | |
269 return(suppressWarnings(!is.na(as.numeric(c)))) | |
270 } | |
271 lines <- readLines(file) | |
272 params <- sapply(paramsName, function(x) NULL) | |
273 | |
274 for (paramName in paramsName) { | |
275 # Find the line with the parameter I add a '$' '=' in the pattern so that for | |
276 # example 'TD0' is not found where I look for 'TD' and LOCSW and WBSW when I look | |
277 # for 'SW' | |
278 pattern <- paste("\\$", paramName, "=", sep = "") | |
279 occurences <- grep(pattern, lines) | |
280 if (length(occurences) == 0L) { | |
281 stop(paste(file, "has no field", pattern)) | |
282 } | |
283 if (length(occurences) > 1L) { | |
284 warning(paste(file, "has more that one field", pattern, " I take the first one")) | |
285 } | |
286 line <- lines[occurences[1]] | |
287 | |
288 # Cut beginning and end of the line '##$TD= 65536' -> '65536' | |
289 igual = as.numeric(regexpr("=", line)) | |
290 | |
291 first <- igual | |
292 while (first <= nchar(line) & !isDigit(substr(line, first, first))) { | |
293 first <- first + 1 | |
294 } | |
295 last <- nchar(line) | |
296 while (last > 0 & !isDigit(substr(line, last, last))) { | |
297 last <- last - 1 | |
298 } | |
299 params[paramName] <- as.numeric(substr(line, first, last)) | |
300 } | |
301 return(params) | |
302 } | |
303 | |
304 | |
305 | |
306 # ReadFids ============================================================================== | |
307 | |
308 ReadFids <- function(path, l = 1, subdirs = FALSE, dirs.names = FALSE) { | |
309 | |
310 # Data initialisation and checks ---------------------------------------------- | |
311 begin_info <- beginTreatment("ReadFids") | |
312 checkArg(path, c("str")) | |
313 checkArg(l, c("pos")) | |
314 if (file.exists(path) == FALSE) { | |
315 stop(paste("Invalid path:", path)) | |
316 } | |
317 | |
318 | |
319 # Extract the FIDs and their info ---------------------------------------------- | |
320 | |
321 if (subdirs == FALSE) { | |
322 fidDirs <- getDirsContainingFid(path) | |
323 n <- length(fidDirs) | |
324 if (n == 0L) { | |
325 stop(paste("No valid fid in", path)) | |
326 } | |
327 if (dirs.names) { | |
328 separator <- .Platform$file.sep | |
329 path_elem <- strsplit(fidDirs,separator) | |
330 fidNames <- sapply(path_elem, function(x) x[[length(path_elem[[1]])]]) | |
331 }else {fidNames <- sapply(X = fidDirs, FUN = getTitle, l = l, subdirs = subdirs, USE.NAMES = F)} | |
332 | |
333 for (i in 1:n) { | |
334 fidList <- ReadFid(fidDirs[i]) | |
335 fid <- fidList[["fid"]] | |
336 info <- fidList[["params"]] | |
337 m <- length(fid) | |
338 if (i == 1) { | |
339 Fid_data <- matrix(nrow = n, ncol = m, dimnames = list(fidNames, | |
340 info[["DT"]] * (0:(m - 1)))) | |
341 Fid_info <- matrix(nrow = n, ncol = length(info), dimnames = list(fidNames, | |
342 names(info))) | |
343 } | |
344 Fid_data[i, ] <- fid | |
345 Fid_info[i, ] <- unlist(info) | |
346 } | |
347 | |
348 } else { | |
349 maindirs <- dir(path, full.names = TRUE) # subdirectories | |
350 Fid_data <- numeric() | |
351 Fid_info <- numeric() | |
352 | |
353 fidDirs <- c() | |
354 for (j in maindirs) { | |
355 fd <- getDirsContainingFid(j) # recoved FIDs from subdirectories | |
356 n <- length(fd) | |
357 if (n > 0L) { | |
358 fidDirs <- c(fidDirs, fd) | |
359 } else {warning(paste("No valid fid in",j ))} | |
360 } | |
361 | |
362 if (dirs.names==TRUE) { | |
363 if (length(fidDirs)!= length(dir(path))) { # at least one subdir contains more than 1 FID | |
364 separator <- .Platform$file.sep | |
365 path_elem <- strsplit(fidDirs,separator) | |
366 fidNames <- sapply(path_elem, function(x) paste(x[[length(path_elem[[1]])-1]], | |
367 x[[length(path_elem[[1]])]], sep = "_")) | |
368 }else {fidNames <- dir(path)} | |
369 | |
370 } else {fidNames <- sapply(X = fidDirs, FUN = getTitle, l = l, subdirs = subdirs, USE.NAMES = F)} | |
371 | |
372 for (i in 1:length(fidNames)) { | |
373 fidList <- ReadFid(fidDirs[i]) | |
374 fid <- fidList[["fid"]] | |
375 info <- fidList[["params"]] | |
376 m <- length(fid) | |
377 if (i == 1) { | |
378 Fid_data <- matrix(nrow = length(fidNames), ncol = m, dimnames = list(fidNames, | |
379 info[["DT"]] * (0:(m - 1)))) | |
380 Fid_info <- matrix(nrow = length(fidNames), ncol = length(info), dimnames = list(fidNames, | |
381 names(info))) | |
382 } | |
383 Fid_data[i, ] <- fid | |
384 Fid_info[i, ] <- unlist(info) | |
385 } | |
386 | |
387 | |
388 } | |
389 | |
390 # Check for non-unique IDs ---------------------------------------------- | |
391 NonnuniqueIds <- sum(duplicated(row.names(Fid_data))) | |
392 cat("dim Fid_data: ", dim(Fid_data), "\n") | |
393 cat("IDs: ", rownames(Fid_data), "\n") | |
394 cat("non-unique IDs?", NonnuniqueIds, "\n") | |
395 if (NonnuniqueIds > 0) { | |
396 warning("There are duplicated IDs: ", Fid_data[duplicated(Fid_data)]) | |
397 } | |
398 | |
399 | |
400 # Return the results ---------------------------------------------- | |
401 return(list(Fid_data = endTreatment("ReadFids", begin_info, Fid_data), Fid_info = Fid_info)) | |
402 | |
403 } | |
404 |