Mercurial > repos > marie-tremblay-metatoul > nmr_preprocessing
comparison ReadFids_script.R @ 2:5e64657b4fe5 draft
planemo upload for repository https://github.com/workflow4metabolomics/nmr_preprocessing commit 22ca8782d7c4c0211e13c95b425d4f29f53f995e
| author | lecorguille |
|---|---|
| date | Wed, 28 Mar 2018 08:05:12 -0400 |
| parents | |
| children | 122df1bf0a8c |
comparison
equal
deleted
inserted
replaced
| 1:cbea5e9fd0b4 | 2:5e64657b4fe5 |
|---|---|
| 1 ################################################################################################ | |
| 2 # | |
| 3 # Read FIDs in Bruker format | |
| 4 # | |
| 5 # | |
| 6 ################################################################################################ | |
| 7 | |
| 8 | |
| 9 # vec2mat ============================================================================== | |
| 10 vec2mat <- function(vec) { | |
| 11 return(matrix(vec, nrow = 1, dimnames = list(c(1), names(vec)))) | |
| 12 } | |
| 13 | |
| 14 | |
| 15 # ReadFid ============================================================================== | |
| 16 ReadFid <- function(path) { | |
| 17 | |
| 18 # Read 1D FID using Bruker XWinNMR and TopSpin format. It is inspired of the | |
| 19 # matNMR matlab library which deals with 2D FID and also other formats | |
| 20 # Read also the parameters in the acqus file | |
| 21 | |
| 22 paramFile <- file.path(path, "acqus") | |
| 23 # BYTEORDA: 0 -> Little Endian 1 -> Big Endian | |
| 24 params <- readParams(paramFile, c("TD", "BYTORDA", "DIGMOD", "DECIM", "DSPFVS", | |
| 25 "SW_h", "SW", "O1")) | |
| 26 | |
| 27 if (params[["DSPFVS"]] >= 20) { | |
| 28 # The group delay first order phase correction is given directly from version 20 | |
| 29 grpdly <- readParams(paramFile, c("GRPDLY")) | |
| 30 params[["GRPDLY"]] <- grpdly[["GRPDLY"]] | |
| 31 } | |
| 32 TD <- params[["TD"]] | |
| 33 endianness <- if (params$BYTORDA) | |
| 34 "big" else "little" | |
| 35 if (TD%%2 != 0) { | |
| 36 stop(paste("Only even numbers are allowed for size in TD because it is complex | |
| 37 data with the real and imaginary part for each element.", | |
| 38 "The TD value is in the", paramFile, "file")) | |
| 39 } | |
| 40 | |
| 41 # Interpret params Dwell Time, time between 2 data points in the FID | |
| 42 params[["DT"]] <- 1/(2 * params[["SW_h"]]) | |
| 43 | |
| 44 # Read fid | |
| 45 fidFile <- file.path(path, "fid") | |
| 46 fidOnDisk <- readBin(fidFile, what = "int", n = TD, size = 4L, endian = endianness) | |
| 47 | |
| 48 # Real size that is on disk (it should be equal to TD2, except for TopSpin/Bruker | |
| 49 # (which is our case) according to matNMR as just discussed | |
| 50 TDOnDisk <- length(fidOnDisk) | |
| 51 if (TDOnDisk < TD) { | |
| 52 warning("Size is smaller than expected, the rest is filled with zero so the size is the same for every fid") | |
| 53 fidGoodSize <- sapply(vector("list", length = TD), function(x) 0) | |
| 54 fidGoodSize[1:TDOnDisk] <- fidOnDisk | |
| 55 | |
| 56 } else if (TDOnDisk > TD) { | |
| 57 warning("Size is bigger than expected, the rest ignored so the size is the same for every fid") | |
| 58 fidGoodSize <- fidOnDisk(1:TD) | |
| 59 | |
| 60 } else { | |
| 61 fidGoodSize <- fidOnDisk | |
| 62 } | |
| 63 | |
| 64 fidRePart <- fidGoodSize[seq(from = 1, to = TD, by = 2)] | |
| 65 fidImPart <- fidGoodSize[seq(from = 2, to = TD, by = 2)] | |
| 66 fid <- complex(real = fidRePart, imaginary = fidImPart) | |
| 67 | |
| 68 return(list(fid = fid, params = params)) | |
| 69 } | |
| 70 | |
| 71 | |
| 72 | |
| 73 | |
| 74 # getDirsContainingFid ============================================================================== | |
| 75 getDirsContainingFid <- function(path) { | |
| 76 subdirs <- dir(path, full.names = TRUE) | |
| 77 if (length(subdirs) > 0) { | |
| 78 cond <- sapply(subdirs, function(x) { | |
| 79 content <- dir(x) | |
| 80 # subdirs must contain fid, acqu and acqus files | |
| 81 return("fid" %in% content && "acqu" %in% content && "acqus" %in% content) | |
| 82 }) | |
| 83 subdirs <- subdirs[cond] | |
| 84 } | |
| 85 return(subdirs) | |
| 86 } | |
| 87 | |
| 88 | |
| 89 | |
| 90 | |
| 91 | |
| 92 | |
| 93 # beginTreatment ============================================================================== | |
| 94 | |
| 95 beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL, | |
| 96 force.real = FALSE) { | |
| 97 | |
| 98 cat("Begin", name, "\n") | |
| 99 | |
| 100 | |
| 101 # Formatting the Signal_data and Signal_info ----------------------- | |
| 102 | |
| 103 vec <- is.vector(Signal_data) | |
| 104 if (vec) { | |
| 105 Signal_data <- vec2mat(Signal_data) | |
| 106 } | |
| 107 if (is.vector(Signal_info)) { | |
| 108 Signal_info <- vec2mat(Signal_info) | |
| 109 } | |
| 110 if (!is.null(Signal_data)) { | |
| 111 if (!is.matrix(Signal_data)) { | |
| 112 stop("Signal_data is not a matrix.") | |
| 113 } | |
| 114 if (!is.complex(Signal_data) && !is.numeric(Signal_data)) { | |
| 115 stop("Signal_data contains non-numerical values.") | |
| 116 } | |
| 117 } | |
| 118 if (!is.null(Signal_info) && !is.matrix(Signal_info)) { | |
| 119 stop("Signal_info is not a matrix.") | |
| 120 } | |
| 121 | |
| 122 | |
| 123 Original_data <- Signal_data | |
| 124 | |
| 125 # Extract the real part of the spectrum --------------------------- | |
| 126 | |
| 127 if (force.real) { | |
| 128 if (is.complex(Signal_data)) { | |
| 129 Signal_data <- Re(Signal_data) | |
| 130 } else { | |
| 131 # The signal is numeric Im(Signal_data) is zero anyway so let's avoid | |
| 132 # using complex(real=...,imaginary=0) which would give a complex signal | |
| 133 # in endTreatment() | |
| 134 force.real <- FALSE | |
| 135 } | |
| 136 } | |
| 137 | |
| 138 | |
| 139 # Return the formatted data and metadata entries -------------------- | |
| 140 | |
| 141 return(list(start = proc.time(), vec = vec, force.real = force.real, | |
| 142 Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info)) | |
| 143 } | |
| 144 | |
| 145 | |
| 146 # endTreatment ============================================================================== | |
| 147 | |
| 148 endTreatment <- function(name, begin_info, Signal_data) { | |
| 149 end_time = proc.time() # record it as soon as possible | |
| 150 start_time = begin_info[["start"]] | |
| 151 delta_time = end_time - start_time | |
| 152 delta = delta_time[] | |
| 153 cat("End", name, "\n") | |
| 154 cat("It lasted", | |
| 155 round(delta["user.self"], 3), "s user time,", | |
| 156 round(delta["sys.self"] , 3), "s system time and", | |
| 157 round(delta["elapsed"] , 3), "s elapsed time.\n") | |
| 158 if (begin_info[["force.real"]]) { | |
| 159 # The imaginary part is left untouched | |
| 160 i <- complex(real=0, imaginary=1) | |
| 161 Signal_data = Signal_data + i * Im(begin_info[["Original_data"]]) | |
| 162 } | |
| 163 if (begin_info[["vec"]]) { | |
| 164 Signal_data = Signal_data[1,] | |
| 165 } | |
| 166 return(Signal_data) | |
| 167 } | |
| 168 | |
| 169 | |
| 170 # checkArg ============================================================================== | |
| 171 | |
| 172 checkArg <- function(arg, checks, can.be.null=FALSE) { | |
| 173 check.list <- list(bool=c(is.logical, "a boolean"), | |
| 174 int =c(function(x){x%%1==0}, "an integer"), | |
| 175 num =c(is.numeric, "a numeric"), | |
| 176 str =c(is.character, "a string"), | |
| 177 pos =c(function(x){x>0}, "positive"), | |
| 178 pos0=c(function(x){x>=0}, "positive or zero"), | |
| 179 l1 =c(function(x){length(x)==1}, "of length 1") | |
| 180 ) | |
| 181 if (is.null(arg)) { | |
| 182 if (!can.be.null) { | |
| 183 stop(deparse(substitute(arg)), " is null.") | |
| 184 } | |
| 185 } else { | |
| 186 if (is.matrix(arg)) { | |
| 187 stop(deparse(substitute(arg)), " is not scalar.") | |
| 188 } | |
| 189 for (c in checks) { | |
| 190 if (!check.list[[c]][[1]](arg)) { | |
| 191 stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".") | |
| 192 } | |
| 193 } | |
| 194 } | |
| 195 } | |
| 196 | |
| 197 | |
| 198 # getArg ============================================================================== | |
| 199 | |
| 200 getArg <- function(arg, info, argname, can.be.absent=FALSE) { | |
| 201 if (is.null(arg)) { | |
| 202 start <- paste("impossible to get argument", argname, "it was not given directly and"); | |
| 203 if (!is.matrix(info)) { | |
| 204 stop(paste(start, "the info matrix was not given")) | |
| 205 } | |
| 206 if (!(argname %in% colnames(info))) { | |
| 207 if (can.be.absent) { | |
| 208 return(NULL) | |
| 209 } else { | |
| 210 stop(paste(start, "is not in the info matrix")) | |
| 211 } | |
| 212 } | |
| 213 if (nrow(info) < 1) { | |
| 214 stop(paste(start, "the info matrix has no row")) | |
| 215 } | |
| 216 arg <- info[1,argname] | |
| 217 if (is.na(arg)) { | |
| 218 stop(paste(start, "it is NA in the info matrix")) | |
| 219 } | |
| 220 } | |
| 221 return(arg) | |
| 222 } | |
| 223 | |
| 224 | |
| 225 | |
| 226 # getTitle ============================================================================== | |
| 227 | |
| 228 # Get the name of the signal from the title file or fromt the name of the subdirectory | |
| 229 # Get the name of the signal from the title file or fromt the name of the subdirectory | |
| 230 | |
| 231 getTitle <- function(path, l, subdirs) { | |
| 232 title <- NULL | |
| 233 title_file <- file.path(file.path(file.path(path, "pdata"), "1"), "title") | |
| 234 if (file.exists(title_file)) { | |
| 235 lines <- readLines(title_file, warn = FALSE) | |
| 236 if (length(lines) >= 1) { | |
| 237 first_line <- gsub("^\\s+|\\s+$", "", lines[l]) | |
| 238 if (nchar(first_line) >= 1) { | |
| 239 title <- first_line | |
| 240 } else { | |
| 241 warning(paste("The", l ,"line of the title file is blank for directory ", | |
| 242 path, "and the (sub)dirs names are used instead")) | |
| 243 } | |
| 244 } else { | |
| 245 warning(paste("The title file is empty for directory ", path, "and the (sub)dirs names are used instead")) | |
| 246 } | |
| 247 } else { | |
| 248 warning(paste("Title file doesn't exists for directory ", path, "\n the (sub)dirs names are used instead")) | |
| 249 } | |
| 250 if (is.null(title)) { | |
| 251 if(subdirs) { | |
| 252 separator <- .Platform$file.sep | |
| 253 path_elem <- strsplit(path,separator)[[1]] | |
| 254 title <- paste(path_elem[length(path_elem)-1], path_elem[length(path_elem)], sep = "_") | |
| 255 } else{title <- basename(path)} | |
| 256 } | |
| 257 return(title) | |
| 258 } | |
| 259 | |
| 260 | |
| 261 | |
| 262 | |
| 263 # readParams ============================================================================== | |
| 264 # Read parameter values for Fid_info in the ReadFids function | |
| 265 | |
| 266 readParams <- function(file, paramsName) { | |
| 267 | |
| 268 isDigit <- function(c) { | |
| 269 return(suppressWarnings(!is.na(as.numeric(c)))) | |
| 270 } | |
| 271 lines <- readLines(file) | |
| 272 params <- sapply(paramsName, function(x) NULL) | |
| 273 | |
| 274 for (paramName in paramsName) { | |
| 275 # Find the line with the parameter I add a '$' '=' in the pattern so that for | |
| 276 # example 'TD0' is not found where I look for 'TD' and LOCSW and WBSW when I look | |
| 277 # for 'SW' | |
| 278 pattern <- paste("\\$", paramName, "=", sep = "") | |
| 279 occurences <- grep(pattern, lines) | |
| 280 if (length(occurences) == 0L) { | |
| 281 stop(paste(file, "has no field", pattern)) | |
| 282 } | |
| 283 if (length(occurences) > 1L) { | |
| 284 warning(paste(file, "has more that one field", pattern, " I take the first one")) | |
| 285 } | |
| 286 line <- lines[occurences[1]] | |
| 287 | |
| 288 # Cut beginning and end of the line '##$TD= 65536' -> '65536' | |
| 289 igual = as.numeric(regexpr("=", line)) | |
| 290 | |
| 291 first <- igual | |
| 292 while (first <= nchar(line) & !isDigit(substr(line, first, first))) { | |
| 293 first <- first + 1 | |
| 294 } | |
| 295 last <- nchar(line) | |
| 296 while (last > 0 & !isDigit(substr(line, last, last))) { | |
| 297 last <- last - 1 | |
| 298 } | |
| 299 params[paramName] <- as.numeric(substr(line, first, last)) | |
| 300 } | |
| 301 return(params) | |
| 302 } | |
| 303 | |
| 304 | |
| 305 | |
| 306 # ReadFids ============================================================================== | |
| 307 | |
| 308 ReadFids <- function(path, l = 1, subdirs = FALSE, dirs.names = FALSE) { | |
| 309 | |
| 310 # Data initialisation and checks ---------------------------------------------- | |
| 311 begin_info <- beginTreatment("ReadFids") | |
| 312 checkArg(path, c("str")) | |
| 313 checkArg(l, c("pos")) | |
| 314 if (file.exists(path) == FALSE) { | |
| 315 stop(paste("Invalid path:", path)) | |
| 316 } | |
| 317 | |
| 318 | |
| 319 # Extract the FIDs and their info ---------------------------------------------- | |
| 320 | |
| 321 if (subdirs == FALSE) { | |
| 322 fidDirs <- getDirsContainingFid(path) | |
| 323 n <- length(fidDirs) | |
| 324 if (n == 0L) { | |
| 325 stop(paste("No valid fid in", path)) | |
| 326 } | |
| 327 if (dirs.names) { | |
| 328 separator <- .Platform$file.sep | |
| 329 path_elem <- strsplit(fidDirs,separator) | |
| 330 fidNames <- sapply(path_elem, function(x) x[[length(path_elem[[1]])]]) | |
| 331 }else {fidNames <- sapply(X = fidDirs, FUN = getTitle, l = l, subdirs = subdirs, USE.NAMES = F)} | |
| 332 | |
| 333 for (i in 1:n) { | |
| 334 fidList <- ReadFid(fidDirs[i]) | |
| 335 fid <- fidList[["fid"]] | |
| 336 info <- fidList[["params"]] | |
| 337 m <- length(fid) | |
| 338 if (i == 1) { | |
| 339 Fid_data <- matrix(nrow = n, ncol = m, dimnames = list(fidNames, | |
| 340 info[["DT"]] * (0:(m - 1)))) | |
| 341 Fid_info <- matrix(nrow = n, ncol = length(info), dimnames = list(fidNames, | |
| 342 names(info))) | |
| 343 } | |
| 344 Fid_data[i, ] <- fid | |
| 345 Fid_info[i, ] <- unlist(info) | |
| 346 } | |
| 347 | |
| 348 } else { | |
| 349 maindirs <- dir(path, full.names = TRUE) # subdirectories | |
| 350 Fid_data <- numeric() | |
| 351 Fid_info <- numeric() | |
| 352 | |
| 353 fidDirs <- c() | |
| 354 for (j in maindirs) { | |
| 355 fd <- getDirsContainingFid(j) # recoved FIDs from subdirectories | |
| 356 n <- length(fd) | |
| 357 if (n > 0L) { | |
| 358 fidDirs <- c(fidDirs, fd) | |
| 359 } else {warning(paste("No valid fid in",j ))} | |
| 360 } | |
| 361 | |
| 362 if (dirs.names==TRUE) { | |
| 363 if (length(fidDirs)!= length(dir(path))) { # at least one subdir contains more than 1 FID | |
| 364 separator <- .Platform$file.sep | |
| 365 path_elem <- strsplit(fidDirs,separator) | |
| 366 fidNames <- sapply(path_elem, function(x) paste(x[[length(path_elem[[1]])-1]], | |
| 367 x[[length(path_elem[[1]])]], sep = "_")) | |
| 368 }else {fidNames <- dir(path)} | |
| 369 | |
| 370 } else {fidNames <- sapply(X = fidDirs, FUN = getTitle, l = l, subdirs = subdirs, USE.NAMES = F)} | |
| 371 | |
| 372 for (i in 1:length(fidNames)) { | |
| 373 fidList <- ReadFid(fidDirs[i]) | |
| 374 fid <- fidList[["fid"]] | |
| 375 info <- fidList[["params"]] | |
| 376 m <- length(fid) | |
| 377 if (i == 1) { | |
| 378 Fid_data <- matrix(nrow = length(fidNames), ncol = m, dimnames = list(fidNames, | |
| 379 info[["DT"]] * (0:(m - 1)))) | |
| 380 Fid_info <- matrix(nrow = length(fidNames), ncol = length(info), dimnames = list(fidNames, | |
| 381 names(info))) | |
| 382 } | |
| 383 Fid_data[i, ] <- fid | |
| 384 Fid_info[i, ] <- unlist(info) | |
| 385 } | |
| 386 | |
| 387 | |
| 388 } | |
| 389 | |
| 390 # Check for non-unique IDs ---------------------------------------------- | |
| 391 NonnuniqueIds <- sum(duplicated(row.names(Fid_data))) | |
| 392 cat("dim Fid_data: ", dim(Fid_data), "\n") | |
| 393 cat("IDs: ", rownames(Fid_data), "\n") | |
| 394 cat("non-unique IDs?", NonnuniqueIds, "\n") | |
| 395 if (NonnuniqueIds > 0) { | |
| 396 warning("There are duplicated IDs: ", Fid_data[duplicated(Fid_data)]) | |
| 397 } | |
| 398 | |
| 399 | |
| 400 # Return the results ---------------------------------------------- | |
| 401 return(list(Fid_data = endTreatment("ReadFids", begin_info, Fid_data), Fid_info = Fid_info)) | |
| 402 | |
| 403 } | |
| 404 |
