Mercurial > repos > testtool > geo_data
diff GRsetFromGEO/GRsetFromGEO.R @ 36:b3761b109ca9 draft
Uploaded
author | testtool |
---|---|
date | Mon, 24 Apr 2017 08:50:59 -0400 |
parents | 694382fd220a |
children | c982fdb0e27d |
line wrap: on
line diff
--- a/GRsetFromGEO/GRsetFromGEO.R Fri Apr 14 14:16:46 2017 -0400 +++ b/GRsetFromGEO/GRsetFromGEO.R Mon Apr 24 08:50:59 2017 -0400 @@ -1,60 +1,64 @@ require("minfi", quietly = TRUE) +require("BiocGenerics", quietly = TRUE) +require("data.table", quietly = TRUE) +require("GEOquery", quietly = TRUE) +require("rtracklayer", quietly = TRUE) +require("FDb.InfiniumMethylation.hg19", quietly = TRUE) args <- commandArgs(trailingOnly = TRUE) GSE = args[1] output = args[2] -function (GSE = NULL, path = NULL, array = "IlluminaHumanMethylation450k", - annotation = .default.450k.annotation, what = c("Beta", "M"), - mergeManifest = FALSE, i = 1) -{ - what <- match.arg(what) - if (is.null(GSE) && is.null(path)) - stop("Either GSE or path must be supplied.") - if (!is.null(GSE)) - gset <- GEOquery::getGEO(GSE) - else gset <- GEOquery::getGEO(filename = file.path(path, - list.files(path, pattern = ".soft"))) - if (length(gset) == 0) - stop("Empty list retrieved from GEO.") - if (length(gset) > 1) { - warning("More than one ExpressionSet found:\n", names(gset), - "\nUsing entry ", i) - gset <- gset[[i]] - } - else gset <- gset[[1]] +getAnnotationString <- function(annotation) { + if(length(annotation) == 1) + return(sprintf("%sanno", annotation)) + if(all(c("array", "annotation") %in% names(annotation))) + return(sprintf("%sanno.%s", annotation["array"], annotation["annotation"])) + stop("unable to get the annotation string for this object") +} + +default.450k.annotation <- "ilmn12.hg19" + + + +function (GSE = GSE, array = "IlluminaHumanMethylation450k", annotation = default.450k.annotation, + what = c("Beta", "M"), mergeManifest = FALSE, i = 1){ + + + gset <- getGEO(GSE) + gset <- gset[[1]] + platform <- annotation(gset) - if (platform != "GPL13534") - warning(sprintf("%s is not the platform ID associated with IlluminaHumanMethylation450k. Should be GPL13534.", - platform)) - if (what == "Beta" & (min(exprs(gset)[, 1], na.rm = TRUE) < - 0 | max(exprs(gset)[, 1], na.rm = TRUE) > 1)) - warning("Values outside [0,1] detected. 'what' argument should not be Beta.") - ann <- .getAnnotationString(c(array = array, annotation = annotation)) + + ann <- getAnnotationString(c(array = array, annotation = annotation)) if (!require(ann, character.only = TRUE)) stop(sprintf("cannot load annotation package %s", ann)) + object <- get(ann) + gr <- getLocations(object, mergeManifest = mergeManifest, orderByLocation = TRUE) locusNames <- names(gr) sampleNames(gset) <- gset$title common <- intersect(locusNames, fData(gset)$Name) - if (length(common) == 0) + if (length(common) == 0) { stop("No rowname matches. 'rownames' need to match IlluminaHumanMethylation450k probe names.") - ind1 <- match(common, fData(gset)$Name) - ind2 <- match(common, locusNames) - preprocessing <- c(rg.norm = paste0("See GEO ", GSE, " for details")) - if (what == "Beta") { - out <- GenomicRatioSet(gr = gr[ind2, ], Beta = exprs(gset)[ind1, - , drop = FALSE], M = NULL, CN = NULL, pData = pData(gset), - annotation = c(array = array, annotation = annotation), - preprocessMethod = preprocessing) + ind1 <- match(common, fData(gset)$Name) + ind2 <- match(common, locusNames) + preprocessing <- c(rg.norm = paste0("See GEO ", GSE, " for details")) + if (what == "Beta") { + out <- GenomicRatioSet(gr = gr[ind2, ], Beta = exprs(gset)[ind1, + , drop = FALSE], M = NULL, CN = NULL, pData = pData(gset), + annotation = c(array = array, annotation = annotation), + preprocessMethod = preprocessing) + } + else { + out <- GenomicRatioSet(gr = gr[ind2, ], Beta = NULL, + M = exprs(gset)[ind1, , drop = FALSE], CN = NULL, + pData = pData(gset), annotation = c(array = array, + annotation = annotation), preprocessMethod = preprocessing) + } + return(out) } - else { - out <- GenomicRatioSet(gr = gr[ind2, ], Beta = NULL, - M = exprs(gset)[ind1, , drop = FALSE], CN = NULL, - pData = pData(gset), annotation = c(array = array, - annotation = annotation), preprocessMethod = preprocessing) - } - save(out,output) + save(out,file = output) }