Mercurial > repos > lecorguille > camera_annotate
comparison lib.r @ 16:a2c49996603e draft
planemo upload commit e440674afa3e58c46100b0ac7c305a6f46ecbbdc
author | lecorguille |
---|---|
date | Wed, 19 Sep 2018 03:22:21 -0400 |
parents | 1c30ff90f3ae |
children | 73d82de36369 |
comparison
equal
deleted
inserted
replaced
15:6139bfcc95cb | 16:a2c49996603e |
---|---|
1 # lib.r | 1 # lib.r |
2 | |
3 # This function retrieve a xset like object | |
4 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
5 getxcmsSetObject <- function(xobject) { | |
6 # XCMS 1.x | |
7 if (class(xobject) == "xcmsSet") | |
8 return (xobject) | |
9 # XCMS 3.x | |
10 if (class(xobject) == "XCMSnExp") { | |
11 # Get the legacy xcmsSet object | |
12 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | |
13 if (is.null(xset@phenoData$sample_group)) | |
14 sampclass(xset) = "." | |
15 else | |
16 sampclass(xset) <- xset@phenoData$sample_group | |
17 if (!is.null(xset@phenoData$sample_name)) | |
18 rownames(xset@phenoData) = xset@phenoData$sample_name | |
19 return (xset) | |
20 } | |
21 } | |
2 | 22 |
3 #@author G. Le Corguille | 23 #@author G. Le Corguille |
4 #The function create a pdf from the different png generated by diffreport | 24 #The function create a pdf from the different png generated by diffreport |
5 diffreport_png2pdf <- function(filebase) { | 25 diffreport_png2pdf <- function(filebase) { |
6 dir.create("pdf") | 26 dir.create("pdf") |
103 for (i in seq(along=x) ) { | 123 for (i in seq(along=x) ) { |
104 y=1:(length(classes)) | 124 y=1:(length(classes)) |
105 for (n in seq(along=y)){ | 125 for (n in seq(along=y)){ |
106 if(i+n <= length(classes)){ | 126 if(i+n <= length(classes)){ |
107 filebase=paste(classes[i],class2=classes[i+n],sep="-vs-") | 127 filebase=paste(classes[i],class2=classes[i+n],sep="-vs-") |
108 | 128 |
109 diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]]) | 129 diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]],missing=0) |
110 | 130 |
111 diffrepOri = diffrep | 131 diffrepOri = diffrep |
112 | 132 |
113 # renamming of the column rtmed to rt to fit with camera peaklist function output | 133 # renamming of the column rtmed to rt to fit with camera peaklist function output |
114 colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt" | 134 colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt" |
115 colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz" | 135 colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz" |
116 | 136 |
117 # combines results and reorder columns | 137 # combines results and reorder columns |
118 diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F) | 138 diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F) |
119 diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))]) | 139 diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))]) |
120 | 140 |
121 diffrep = RTSecondToMinute(diffrep, listArguments[["convertRTMinute"]]) | 141 diffrep = RTSecondToMinute(diffrep, listArguments[["convertRTMinute"]]) |
122 diffrep = formatIonIdentifiers(diffrep, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]]) | 142 diffrep = formatIonIdentifiers(diffrep, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]]) |
123 | 143 |
124 if(listArguments[["sortpval"]]){ | 144 if(listArguments[["sortpval"]]){ |
125 diffrep=diffrep[order(diffrep$pvalue), ] | 145 diffrep=diffrep[order(diffrep$pvalue), ] |
126 } | 146 } |
127 | 147 |
128 dir.create("tabular") | 148 dir.create("tabular") |
129 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep="")) | 149 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep="")) |
130 | 150 |
131 if (listArguments[["eicmax"]] != 0) { | 151 if (listArguments[["eicmax"]] != 0) { |
132 diffreport_png2pdf(filebase) | 152 diffreport_png2pdf(filebase) |
133 } | 153 } |
134 } | 154 } |
135 } | 155 } |
136 } | 156 } |
137 } | 157 } |
138 | |
139 | 158 |
140 # --- variableMetadata --- | 159 # --- variableMetadata --- |
141 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] | 160 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] |
142 variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]]) | 161 variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]]) |
143 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]]) | 162 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]]) |
300 cat("files_root_directory\t",directory,"\n") | 319 cat("files_root_directory\t",directory,"\n") |
301 | 320 |
302 } | 321 } |
303 return (directory) | 322 return (directory) |
304 } | 323 } |
324 | |
325 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
326 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524 | |
327 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd | |
328 ############################################################ | |
329 ## getEIC | |
330 getEIC <- function(object, mzrange, rtrange = 200, | |
331 groupidx, sampleidx = sampnames(object), | |
332 rt = c("corrected", "raw")) { | |
333 | |
334 files <- filepaths(object) | |
335 grp <- groups(object) | |
336 samp <- sampnames(object) | |
337 prof <- profinfo(object) | |
338 | |
339 rt <- match.arg(rt) | |
340 | |
341 if (is.numeric(sampleidx)) | |
342 sampleidx <- sampnames(object)[sampleidx] | |
343 sampidx <- match(sampleidx, sampnames(object)) | |
344 | |
345 if (!missing(groupidx)) { | |
346 if (is.numeric(groupidx)) | |
347 groupidx <- groupnames(object)[unique(as.integer(groupidx))] | |
348 grpidx <- match(groupidx, groupnames(object, template = groupidx)) | |
349 } | |
350 | |
351 if (missing(mzrange)) { | |
352 if (missing(groupidx)) | |
353 stop("No m/z range or groups specified") | |
354 if (any(is.na(groupval(object, value = "mz")))) | |
355 warning( | |
356 "`NA` values in xcmsSet. Use fillPeaks() on the object to fill", | |
357 "-in missing peak values. Note however that this will also ", | |
358 "insert intensities of 0 for peaks that can not be filled in.") | |
359 mzmin <- apply(groupval(object, value = "mzmin"), 1, min, na.rm = TRUE) | |
360 mzmax <- apply(groupval(object, value = "mzmax"), 1, max, na.rm = TRUE) | |
361 mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2) | |
362 ## if (any(is.na(groupval(object, value = "mz")))) | |
363 ## stop('Please use fillPeaks() to fill up NA values !') | |
364 ## mzmin <- -rowMax(-groupval(object, value = "mzmin")) | |
365 ## mzmax <- rowMax(groupval(object, value = "mzmax")) | |
366 ## mzrange <- matrix(c(mzmin[grpidx], mzmax[grpidx]), ncol = 2) | |
367 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange))) | |
368 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE] | |
369 else if (is.null(dim(mzrange))) | |
370 stop("mzrange must be a matrix") | |
371 colnames(mzrange) <- c("mzmin", "mzmax") | |
372 | |
373 if (length(rtrange) == 1) { | |
374 if (missing(groupidx)) | |
375 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)), | |
376 ncol = 2, byrow = TRUE) | |
377 else { | |
378 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange) | |
379 } | |
380 } else if (is.null(dim(rtrange))) | |
381 stop("rtrange must be a matrix or single number") | |
382 colnames(rtrange) <- c("rtmin", "rtmax") | |
383 | |
384 ## Ensure that we've got corrected retention time if requested. | |
385 if (is.null(object@rt[[rt]])) | |
386 stop(rt, " retention times not present in 'object'!") | |
387 | |
388 ## Ensure that the defined retention time range is within the rtrange of the | |
389 ## object: we're using the max minimal rt of all files and the min maximal rt | |
390 rtrs <- lapply(object@rt[[rt]], range) | |
391 rtr <- c(max(unlist(lapply(rtrs, "[", 1))), | |
392 min(unlist(lapply(rtrs, "[", 2)))) | |
393 ## Check if we've got a range which is completely off: | |
394 if (any(rtrange[, "rtmin"] >= rtr[2] | rtrange[, "rtmax"] <= rtr[1])) { | |
395 outs <- which(rtrange[, "rtmin"] >= rtr[2] | | |
396 rtrange[, "rtmax"] <= rtr[1]) | |
397 stop(length(outs), " of the specified 'rtrange' are completely outside ", | |
398 "of the retention time range of 'object' which is (", rtr[1], ", ", | |
399 rtr[2], "). The first was: (", rtrange[outs[1], "rtmin"], ", ", | |
400 rtrange[outs[1], "rtmax"], "!") | |
401 } | |
402 lower_rt_outside <- rtrange[, "rtmin"] < rtr[1] | |
403 upper_rt_outside <- rtrange[, "rtmax"] > rtr[2] | |
404 if (any(lower_rt_outside) | any(upper_rt_outside)) { | |
405 ## Silently fix these ranges. | |
406 rtrange[lower_rt_outside, "rtmin"] <- rtr[1] | |
407 rtrange[upper_rt_outside, "rtmax"] <- rtr[2] | |
408 } | |
409 | |
410 if (missing(groupidx)) | |
411 gnames <- character(0) | |
412 else | |
413 gnames <- groupidx | |
414 | |
415 eic <- vector("list", length(sampleidx)) | |
416 names(eic) <- sampleidx | |
417 | |
418 for (i in seq(along = sampidx)) { | |
419 | |
420 ## cat(sampleidx[i], "") | |
421 flush.console() | |
422 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other | |
423 ## stuff. | |
424 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt) | |
425 currenteic <- xcms::getEIC(lcraw, mzrange, rtrange, step = prof$step) | |
426 eic[[i]] <- currenteic@eic[[1]] | |
427 rm(lcraw) | |
428 gc() | |
429 } | |
430 ## cat("\n") | |
431 | |
432 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange, | |
433 rt = rt, groupnames = gnames)) | |
434 } | |
435 | |
436 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
437 # https://github.com/sneumann/CAMERA/issues/33#issuecomment-405168524 | |
438 # https://github.com/sneumann/xcms/commit/950a3fe794cdb6b0fda88696e31aab3d97a3b7dd | |
439 ############################################################ | |
440 ## diffreport | |
441 diffreport = function(object, | |
442 class1 = levels(sampclass(object))[1], | |
443 class2 = levels(sampclass(object))[2], | |
444 filebase = character(), | |
445 eicmax = 0, eicwidth = 200, | |
446 sortpval = TRUE, | |
447 classeic = c(class1,class2), | |
448 value = c("into","maxo","intb"), | |
449 metlin = FALSE, | |
450 h = 480, w = 640, mzdec=2, | |
451 missing = numeric(), ...) { | |
452 | |
453 if ( nrow(object@groups)<1 || length(object@groupidx) <1) { | |
454 stop("No group information. Use group().") | |
455 } | |
456 | |
457 if (!is.numeric(w) || !is.numeric(h)) | |
458 stop("'h' and 'w' have to be numeric") | |
459 ## require(multtest) || stop("Couldn't load multtest") | |
460 | |
461 value <- match.arg(value) | |
462 groupmat <- groups(object) | |
463 if (length(groupmat) == 0) | |
464 stop("No group information found") | |
465 samples <- sampnames(object) | |
466 n <- length(samples) | |
467 classlabel <- sampclass(object) | |
468 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))] | |
469 | |
470 values <- groupval(object, "medret", value=value) | |
471 indecies <- groupval(object, "medret", value = "index") | |
472 | |
473 if (!all(c(class1,class2) %in% classlabel)) | |
474 stop("Incorrect Class Labels") | |
475 | |
476 ## c1 and c2 are column indices of class1 and class2 resp. | |
477 c1 <- which(classlabel %in% class1) | |
478 c2 <- which(classlabel %in% class2) | |
479 ceic <- which(classlabel %in% classeic) | |
480 if (length(intersect(c1, c2)) > 0) | |
481 stop("Intersecting Classes") | |
482 | |
483 ## Optionally replace NA values with the value provided with missing | |
484 if (length(missing)) { | |
485 if (is.numeric(missing)) { | |
486 ## handles NA, Inf and -Inf | |
487 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1] | |
488 } else | |
489 stop("'missing' should be numeric") | |
490 } | |
491 ## Check against missing Values | |
492 if (any(is.na(values[, c(c1, c2)]))) | |
493 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill", | |
494 "-in missing peak values. Note however that this will also ", | |
495 "insert intensities of 0 for peaks that can not be filled in.") | |
496 | |
497 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE) | |
498 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE) | |
499 | |
500 ## Calculate fold change. | |
501 ## For foldchange <1 set fold to 1/fold | |
502 ## See tstat to check which was higher | |
503 fold <- mean2 / mean1 | |
504 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1] | |
505 | |
506 testval <- values[,c(c1,c2)] | |
507 ## Replace eventual infinite values with NA (CAMERA issue #33) | |
508 testval[is.infinite(testval)] <- NA | |
509 testclab <- c(rep(0,length(c1)),rep(1,length(c2))) | |
510 | |
511 if (min(length(c1), length(c2)) >= 2) { | |
512 tstat <- mt.teststat(testval, testclab, ...) | |
513 pvalue <- xcms:::pval(testval, testclab, tstat) | |
514 } else { | |
515 message("Too few samples per class, skipping t-test.") | |
516 tstat <- pvalue <- rep(NA,nrow(testval)) | |
517 } | |
518 stat <- data.frame(fold = fold, tstat = tstat, pvalue = pvalue) | |
519 if (length(levels(sampclass(object))) >2) { | |
520 pvalAnova<-c() | |
521 for(i in 1:nrow(values)){ | |
522 var<-as.numeric(values[i,]) | |
523 ano<-summary(aov(var ~ sampclass(object)) ) | |
524 pvalAnova<-append(pvalAnova, unlist(ano)["Pr(>F)1"]) | |
525 } | |
526 stat<-cbind(stat, anova= pvalAnova) | |
527 } | |
528 if (metlin) { | |
529 neutralmass <- groupmat[,"mzmed"] + ifelse(metlin < 0, 1, -1) | |
530 metlin <- abs(metlin) | |
531 digits <- ceiling(-log10(metlin))+1 | |
532 metlinurl <- | |
533 paste("http://metlin.scripps.edu/simple_search_result.php?mass_min=", | |
534 round(neutralmass - metlin, digits), "&mass_max=", | |
535 round(neutralmass + metlin, digits), sep="") | |
536 values <- cbind(metlin = metlinurl, values) | |
537 } | |
538 twosamp <- cbind(name = groupnames(object), stat, groupmat, values) | |
539 if (sortpval) { | |
540 tsidx <- order(twosamp[,"pvalue"]) | |
541 twosamp <- twosamp[tsidx,] | |
542 rownames(twosamp) <- 1:nrow(twosamp) | |
543 values<-values[tsidx,] | |
544 } else | |
545 tsidx <- 1:nrow(values) | |
546 | |
547 if (length(filebase)) | |
548 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA) | |
549 | |
550 if (eicmax > 0) { | |
551 if (length(unique(peaks(object)[,"rt"])) > 1) { | |
552 ## This looks like "normal" LC data | |
553 | |
554 eicmax <- min(eicmax, length(tsidx)) | |
555 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic, | |
556 groupidx = tsidx[seq(length = eicmax)]) | |
557 | |
558 if (length(filebase)) { | |
559 eicdir <- paste(filebase, "_eic", sep="") | |
560 boxdir <- paste(filebase, "_box", sep="") | |
561 dir.create(eicdir) | |
562 dir.create(boxdir) | |
563 if (capabilities("png")){ | |
564 xcms:::xcmsBoxPlot(values[seq(length = eicmax),], | |
565 sampclass(object), dirpath=boxdir, pic="png", width=w, height=h) | |
566 png(file.path(eicdir, "%003d.png"), width = w, height = h) | |
567 } else { | |
568 xcms:::xcmsBoxPlot(values[seq(length = eicmax),], | |
569 sampclass(object), dirpath=boxdir, pic="pdf", width=w, height=h) | |
570 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, | |
571 height = h/72, onefile = FALSE) | |
572 } | |
573 } | |
574 plot(eics, object, rtrange = eicwidth, mzdec=mzdec) | |
575 | |
576 if (length(filebase)) | |
577 dev.off() | |
578 } else { | |
579 ## This looks like a direct-infusion single spectrum | |
580 if (length(filebase)) { | |
581 eicdir <- paste(filebase, "_eic", sep="") | |
582 boxdir <- paste(filebase, "_box", sep="") | |
583 dir.create(eicdir) | |
584 dir.create(boxdir) | |
585 if (capabilities("png")){ | |
586 xcmsBoxPlot(values[seq(length = eicmax),], | |
587 sampclass(object), dirpath=boxdir, pic="png", | |
588 width=w, height=h) | |
589 png(file.path(eicdir, "%003d.png"), width = w, height = h, | |
590 units = "px") | |
591 } else { | |
592 xcmsBoxPlot(values[seq(length = eicmax),], | |
593 sampclass(object), dirpath=boxdir, pic="pdf", | |
594 width=w, height=h) | |
595 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, | |
596 height = h/72, onefile = FALSE) | |
597 } | |
598 } | |
599 | |
600 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1) | |
601 | |
602 if (length(filebase)) | |
603 dev.off() | |
604 } | |
605 } | |
606 | |
607 invisible(twosamp) | |
608 } |