comparison lib.r @ 12:961089e21ae2 draft default tip

planemo upload commit 1a5357c0e28103a0325b2df858504721f6266049
author lecorguille
date Tue, 09 Oct 2018 06:03:24 -0400
parents 837c6955e4e9
children
comparison
equal deleted inserted replaced
11:837c6955e4e9 12:961089e21ae2
55 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) 55 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
56 return(variableMetadata) 56 return(variableMetadata)
57 } 57 }
58 58
59 #The function annotateDiffreport without the corr function which bugs 59 #The function annotateDiffreport without the corr function which bugs
60 annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv", dataMatrixOutput="dataMatrix.tsv") { 60 annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv") {
61 # Resolve the bug with x11, with the function png 61 # Resolve the bug with x11, with the function png
62 options(bitmapType='cairo') 62 options(bitmapType='cairo')
63 63
64 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. 64 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
65 res=try(is.null(xset@filled)) 65 res=try(is.null(xset@filled))
105 105
106 # launch annotate 106 # launch annotate
107 xa = do.call("annotate", listArguments4annotate) 107 xa = do.call("annotate", listArguments4annotate)
108 peakList=getPeaklist(xa,intval=listArguments[["intval"]]) 108 peakList=getPeaklist(xa,intval=listArguments[["intval"]])
109 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); 109 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
110
111 # --- dataMatrix ---
112 dataMatrix = peakList[,(make.names(colnames(peakList)) %in% c("name", make.names(sampnames(xa@xcmsSet))))]
113 write.table(dataMatrix, sep="\t", quote=FALSE, row.names=FALSE, file=dataMatrixOutput)
114
115 110
116 # --- Multi condition : diffreport --- 111 # --- Multi condition : diffreport ---
117 diffrepOri=NULL 112 diffrepOri=NULL
118 if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) { 113 if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) {
119 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. 114 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
328 ############################################################ 323 ############################################################
329 ## getEIC 324 ## getEIC
330 getEIC <- function(object, mzrange, rtrange = 200, 325 getEIC <- function(object, mzrange, rtrange = 200,
331 groupidx, sampleidx = sampnames(object), 326 groupidx, sampleidx = sampnames(object),
332 rt = c("corrected", "raw")) { 327 rt = c("corrected", "raw")) {
333 328
334 files <- filepaths(object) 329 files <- filepaths(object)
335 grp <- groups(object) 330 grp <- groups(object)
336 samp <- sampnames(object) 331 samp <- sampnames(object)
337 prof <- profinfo(object) 332 prof <- profinfo(object)
338 333
339 rt <- match.arg(rt) 334 rt <- match.arg(rt)
340 335
341 if (is.numeric(sampleidx)) 336 if (is.numeric(sampleidx))
342 sampleidx <- sampnames(object)[sampleidx] 337 sampleidx <- sampnames(object)[sampleidx]
343 sampidx <- match(sampleidx, sampnames(object)) 338 sampidx <- match(sampleidx, sampnames(object))
344 339
345 if (!missing(groupidx)) { 340 if (!missing(groupidx)) {
346 if (is.numeric(groupidx)) 341 if (is.numeric(groupidx))
347 groupidx <- groupnames(object)[unique(as.integer(groupidx))] 342 groupidx <- groupnames(object)[unique(as.integer(groupidx))]
348 grpidx <- match(groupidx, groupnames(object, template = groupidx)) 343 grpidx <- match(groupidx, groupnames(object, template = groupidx))
349 } 344 }
350 345
351 if (missing(mzrange)) { 346 if (missing(mzrange)) {
352 if (missing(groupidx)) 347 if (missing(groupidx))
353 stop("No m/z range or groups specified") 348 stop("No m/z range or groups specified")
354 if (any(is.na(groupval(object, value = "mz")))) 349 if (any(is.na(groupval(object, value = "mz"))))
355 warning( 350 warning(
367 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange))) 362 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange)))
368 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE] 363 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE]
369 else if (is.null(dim(mzrange))) 364 else if (is.null(dim(mzrange)))
370 stop("mzrange must be a matrix") 365 stop("mzrange must be a matrix")
371 colnames(mzrange) <- c("mzmin", "mzmax") 366 colnames(mzrange) <- c("mzmin", "mzmax")
372 367
373 if (length(rtrange) == 1) { 368 if (length(rtrange) == 1) {
374 if (missing(groupidx)) 369 if (missing(groupidx))
375 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)), 370 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)),
376 ncol = 2, byrow = TRUE) 371 ncol = 2, byrow = TRUE)
377 else { 372 else {
378 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange) 373 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange)
379 } 374 }
380 } else if (is.null(dim(rtrange))) 375 } else if (is.null(dim(rtrange)))
381 stop("rtrange must be a matrix or single number") 376 stop("rtrange must be a matrix or single number")
382 colnames(rtrange) <- c("rtmin", "rtmax") 377 colnames(rtrange) <- c("rtmin", "rtmax")
383 378
384 ## Ensure that we've got corrected retention time if requested. 379 ## Ensure that we've got corrected retention time if requested.
385 if (is.null(object@rt[[rt]])) 380 if (is.null(object@rt[[rt]]))
386 stop(rt, " retention times not present in 'object'!") 381 stop(rt, " retention times not present in 'object'!")
387 382
388 ## Ensure that the defined retention time range is within the rtrange of the 383 ## Ensure that the defined retention time range is within the rtrange of the
389 ## object: we're using the max minimal rt of all files and the min maximal rt 384 ## object: we're using the max minimal rt of all files and the min maximal rt
390 rtrs <- lapply(object@rt[[rt]], range) 385 rtrs <- lapply(object@rt[[rt]], range)
391 rtr <- c(max(unlist(lapply(rtrs, "[", 1))), 386 rtr <- c(max(unlist(lapply(rtrs, "[", 1))),
392 min(unlist(lapply(rtrs, "[", 2)))) 387 min(unlist(lapply(rtrs, "[", 2))))
404 if (any(lower_rt_outside) | any(upper_rt_outside)) { 399 if (any(lower_rt_outside) | any(upper_rt_outside)) {
405 ## Silently fix these ranges. 400 ## Silently fix these ranges.
406 rtrange[lower_rt_outside, "rtmin"] <- rtr[1] 401 rtrange[lower_rt_outside, "rtmin"] <- rtr[1]
407 rtrange[upper_rt_outside, "rtmax"] <- rtr[2] 402 rtrange[upper_rt_outside, "rtmax"] <- rtr[2]
408 } 403 }
409 404
410 if (missing(groupidx)) 405 if (missing(groupidx))
411 gnames <- character(0) 406 gnames <- character(0)
412 else 407 else
413 gnames <- groupidx 408 gnames <- groupidx
414 409
415 eic <- vector("list", length(sampleidx)) 410 eic <- vector("list", length(sampleidx))
416 names(eic) <- sampleidx 411 names(eic) <- sampleidx
417 412
418 for (i in seq(along = sampidx)) { 413 for (i in seq(along = sampidx)) {
419 414
420 ## cat(sampleidx[i], "") 415 ## cat(sampleidx[i], "")
421 flush.console() 416 flush.console()
422 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other 417 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other
423 ## stuff. 418 ## stuff.
424 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt) 419 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt)
426 eic[[i]] <- currenteic@eic[[1]] 421 eic[[i]] <- currenteic@eic[[1]]
427 rm(lcraw) 422 rm(lcraw)
428 gc() 423 gc()
429 } 424 }
430 ## cat("\n") 425 ## cat("\n")
431 426
432 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange, 427 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange,
433 rt = rt, groupnames = gnames)) 428 rt = rt, groupnames = gnames))
434 } 429 }
435 430
436 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 431 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
447 classeic = c(class1,class2), 442 classeic = c(class1,class2),
448 value = c("into","maxo","intb"), 443 value = c("into","maxo","intb"),
449 metlin = FALSE, 444 metlin = FALSE,
450 h = 480, w = 640, mzdec=2, 445 h = 480, w = 640, mzdec=2,
451 missing = numeric(), ...) { 446 missing = numeric(), ...) {
452 447
453 if ( nrow(object@groups)<1 || length(object@groupidx) <1) { 448 if ( nrow(object@groups)<1 || length(object@groupidx) <1) {
454 stop("No group information. Use group().") 449 stop("No group information. Use group().")
455 } 450 }
456 451
457 if (!is.numeric(w) || !is.numeric(h)) 452 if (!is.numeric(w) || !is.numeric(h))
458 stop("'h' and 'w' have to be numeric") 453 stop("'h' and 'w' have to be numeric")
459 ## require(multtest) || stop("Couldn't load multtest") 454 ## require(multtest) || stop("Couldn't load multtest")
460 455
461 value <- match.arg(value) 456 value <- match.arg(value)
462 groupmat <- groups(object) 457 groupmat <- groups(object)
463 if (length(groupmat) == 0) 458 if (length(groupmat) == 0)
464 stop("No group information found") 459 stop("No group information found")
465 samples <- sampnames(object) 460 samples <- sampnames(object)
466 n <- length(samples) 461 n <- length(samples)
467 classlabel <- sampclass(object) 462 classlabel <- sampclass(object)
468 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))] 463 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))]
469 464
470 values <- groupval(object, "medret", value=value) 465 values <- groupval(object, "medret", value=value)
471 indecies <- groupval(object, "medret", value = "index") 466 indecies <- groupval(object, "medret", value = "index")
472 467
473 if (!all(c(class1,class2) %in% classlabel)) 468 if (!all(c(class1,class2) %in% classlabel))
474 stop("Incorrect Class Labels") 469 stop("Incorrect Class Labels")
475 470
476 ## c1 and c2 are column indices of class1 and class2 resp. 471 ## c1 and c2 are column indices of class1 and class2 resp.
477 c1 <- which(classlabel %in% class1) 472 c1 <- which(classlabel %in% class1)
478 c2 <- which(classlabel %in% class2) 473 c2 <- which(classlabel %in% class2)
479 ceic <- which(classlabel %in% classeic) 474 ceic <- which(classlabel %in% classeic)
480 if (length(intersect(c1, c2)) > 0) 475 if (length(intersect(c1, c2)) > 0)
481 stop("Intersecting Classes") 476 stop("Intersecting Classes")
482 477
483 ## Optionally replace NA values with the value provided with missing 478 ## Optionally replace NA values with the value provided with missing
484 if (length(missing)) { 479 if (length(missing)) {
485 if (is.numeric(missing)) { 480 if (is.numeric(missing)) {
486 ## handles NA, Inf and -Inf 481 ## handles NA, Inf and -Inf
487 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1] 482 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1]
491 ## Check against missing Values 486 ## Check against missing Values
492 if (any(is.na(values[, c(c1, c2)]))) 487 if (any(is.na(values[, c(c1, c2)])))
493 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill", 488 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill",
494 "-in missing peak values. Note however that this will also ", 489 "-in missing peak values. Note however that this will also ",
495 "insert intensities of 0 for peaks that can not be filled in.") 490 "insert intensities of 0 for peaks that can not be filled in.")
496 491
497 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE) 492 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE)
498 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE) 493 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE)
499 494
500 ## Calculate fold change. 495 ## Calculate fold change.
501 ## For foldchange <1 set fold to 1/fold 496 ## For foldchange <1 set fold to 1/fold
502 ## See tstat to check which was higher 497 ## See tstat to check which was higher
503 fold <- mean2 / mean1 498 fold <- mean2 / mean1
504 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1] 499 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1]
505 500
506 testval <- values[,c(c1,c2)] 501 testval <- values[,c(c1,c2)]
507 ## Replace eventual infinite values with NA (CAMERA issue #33) 502 ## Replace eventual infinite values with NA (CAMERA issue #33)
508 testval[is.infinite(testval)] <- NA 503 testval[is.infinite(testval)] <- NA
509 testclab <- c(rep(0,length(c1)),rep(1,length(c2))) 504 testclab <- c(rep(0,length(c1)),rep(1,length(c2)))
510 505
511 if (min(length(c1), length(c2)) >= 2) { 506 if (min(length(c1), length(c2)) >= 2) {
512 tstat <- mt.teststat(testval, testclab, ...) 507 tstat <- mt.teststat(testval, testclab, ...)
513 pvalue <- xcms:::pval(testval, testclab, tstat) 508 pvalue <- xcms:::pval(testval, testclab, tstat)
514 } else { 509 } else {
515 message("Too few samples per class, skipping t-test.") 510 message("Too few samples per class, skipping t-test.")
541 twosamp <- twosamp[tsidx,] 536 twosamp <- twosamp[tsidx,]
542 rownames(twosamp) <- 1:nrow(twosamp) 537 rownames(twosamp) <- 1:nrow(twosamp)
543 values<-values[tsidx,] 538 values<-values[tsidx,]
544 } else 539 } else
545 tsidx <- 1:nrow(values) 540 tsidx <- 1:nrow(values)
546 541
547 if (length(filebase)) 542 if (length(filebase))
548 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA) 543 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA)
549 544
550 if (eicmax > 0) { 545 if (eicmax > 0) {
551 if (length(unique(peaks(object)[,"rt"])) > 1) { 546 if (length(unique(peaks(object)[,"rt"])) > 1) {
552 ## This looks like "normal" LC data 547 ## This looks like "normal" LC data
553 548
554 eicmax <- min(eicmax, length(tsidx)) 549 eicmax <- min(eicmax, length(tsidx))
555 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic, 550 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic,
556 groupidx = tsidx[seq(length = eicmax)]) 551 groupidx = tsidx[seq(length = eicmax)])
557 552
558 if (length(filebase)) { 553 if (length(filebase)) {
559 eicdir <- paste(filebase, "_eic", sep="") 554 eicdir <- paste(filebase, "_eic", sep="")
560 boxdir <- paste(filebase, "_box", sep="") 555 boxdir <- paste(filebase, "_box", sep="")
561 dir.create(eicdir) 556 dir.create(eicdir)
562 dir.create(boxdir) 557 dir.create(boxdir)
570 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, 565 pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
571 height = h/72, onefile = FALSE) 566 height = h/72, onefile = FALSE)
572 } 567 }
573 } 568 }
574 plot(eics, object, rtrange = eicwidth, mzdec=mzdec) 569 plot(eics, object, rtrange = eicwidth, mzdec=mzdec)
575 570
576 if (length(filebase)) 571 if (length(filebase))
577 dev.off() 572 dev.off()
578 } else { 573 } else {
579 ## This looks like a direct-infusion single spectrum 574 ## This looks like a direct-infusion single spectrum
580 if (length(filebase)) { 575 if (length(filebase)) {
594 width=w, height=h) 589 width=w, height=h)
595 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, 590 pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
596 height = h/72, onefile = FALSE) 591 height = h/72, onefile = FALSE)
597 } 592 }
598 } 593 }
599 594
600 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1) 595 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1)
601 596
602 if (length(filebase)) 597 if (length(filebase))
603 dev.off() 598 dev.off()
604 } 599 }
605 } 600 }
606 601
607 invisible(twosamp) 602 invisible(twosamp)
608 } 603 }