Mercurial > repos > testtool > annotate_peak
view annotate_peak/.Rhistory @ 18:6a9b9694acbf draft
Uploaded
author | testtool |
---|---|
date | Mon, 20 Mar 2017 06:51:22 -0400 |
parents | 53df7871db21 |
children |
line wrap: on
line source
TAB <- read.csv("input.csv") mysamples <- lapply(TAB$ID,function(x)getGEO(x)) input <- function(TAB) { if(is(TAB, "csv")){ TAB <- read.csv("input.csv")} else{ print("error in data file") }} input() TAB <- read.csv("input.csv")} TAB <- read.csv("input.csv") test_func <- function( clusterSize=2, cutoff=0.2, platform_id='HM450', genome_id='hg19') { args = commandArgs(trailingOnly=TRUE) methyl_file = args[1] ChiPseq_file = args[2] output_file = args[3] options(warn=-1) TAB=read.csv(methyl_file) ChiPseq=import(ChiPseq_file) if(is.null(TAB)){ stop("Must specify input files") }else{ mysamples <- lapply(TAB$ID,function(x) getGEO(x)) } wrappedFunction <- function(test_func) s0 <- lapply(mysamples,Table) id_ref<-lapply(s0,function(x)x$ID_REF) if(length(unique(id_ref)) != 1) { stop("Error different ID_REF for samples") } else if (is.null((unlist(unique(id_ref))))) { stop("NO GSM data avaliable") } else { values<-do.call("cbind",lapply(s0,function(x)x$VALUE)) colnames(values)=TAB$ID rownames(values)=id_ref[[1]] cg <- rownames(values) probe <- c(cg) hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id) probe.info <- hm450.hg19[probe] f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value, BP=as.numeric(probe.info@elementMetadata$probeStart)) designMatrix <- model.matrix(~ TAB$Phenotype) DMR <- bumphunter(values, design = designMatrix, pos=f$BP,cutoff=cutoff,chr=f$CHR) MAT <- DMR$table[which(DMR$table$L>=clusterSize),] METH <- GRanges(seqnames=MAT$chr, ranges=IRanges (start=MAT$start, end=MAT$end), value_pos=MAT$value) peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000) p <- peaks$peaklist$`probe.info///ChiPseq` peakAnno <- annotatePeak(p, file=peakAnno) output_file <- annotatePeak(p, file=output_file) }} test_func <- function( clusterSize=2, cutoff=0.2, platform_id='HM450', genome_id='hg19') { args = commandArgs(trailingOnly=TRUE) methyl_file = args[1] ChiPseq_file = args[2] output_file = args[3] options(warn=-1) TAB=read.csv(methyl_file) ChiPseq=import(ChiPseq_file) if(is.null(TAB)){ stop("Must specify input files") }else{ mysamples <- lapply(TAB$ID,function(x) getGEO(x)) } wrappedFunction <- function(test_func) s0 <- lapply(mysamples,Table) id_ref<-lapply(s0,function(x)x$ID_REF) if(length(unique(id_ref)) != 1) { stop("Error different ID_REF for samples") } else if (is.null((unlist(unique(id_ref))))) { stop("NO GSM data avaliable") } else { values<-do.call("cbind",lapply(s0,function(x)x$VALUE)) colnames(values)=TAB$ID rownames(values)=id_ref[[1]] cg <- rownames(values) probe <- c(cg) hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id) probe.info <- hm450.hg19[probe] f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value, BP=as.numeric(probe.info@elementMetadata$probeStart)) designMatrix <- model.matrix(~ TAB$Phenotype) DMR <- bumphunter(values, design = designMatrix, pos=f$BP,cutoff=cutoff,chr=f$CHR) MAT <- DMR$table[which(DMR$table$L>=clusterSize),] METH <- GRanges(seqnames=MAT$chr, ranges=IRanges (start=MAT$start, end=MAT$end), value_pos=MAT$value) peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000) p <- peaks$peaklist$`probe.info///ChiPseq` peakAnno <- annotatePeak(p, file=peakAnno) output_file <- annotatePeak(p, file=output_file) }} ) a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5) a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5) b<-c(2.9,2.7,2,6,2.7,2.5,2.5,3.2,3.1,3.3,2.8) a-b file<-read.csv("~/Documents/SS2.csv") head(file) e file t.test(file$WBT,file$WBA) t.test(file$WBT,file$WBA,paired=TRUE) mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x)) require("GEOquery",quietly = TRUE) require("BiocGenerics",quietly = TRUE) args <- commandArgs(trailingOnly = TRUE) csv_file = args[1] #csv_file <- ("test-data/input.csv") TAB=read.csv(csv_file) if(is.null(TAB)){ stop("Must specify input files") }else{ mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x)) } csv_file <- ("test-data/input.csv") TAB=read.csv(csv_file) TAB csv_file<-("test-data/input.csv") TAB = read.csv(csv_file) TAB ??bumphunter ??toGranges require("ChIPseeker", quietly = TRUE) require("ChIPpeakAnno", quietly = TRUE) options(warn = -1) args <- commandArgs(trailingOnly = TRUE) DMR = args[1] annoPeakTable = args[2] DMRInfo = read.table( DMR, header = FALSE, sep = "\t", stringsAsFactors = FALSE, quote = "" ) peaks <- GRanges(seqnames = DMRInfo[, 1], ranges = IRanges (start = DMRInfo[, 2], end = DMRInfo[, 3])) peaks[1:2] DMR <- ("test-data/DMR.bed") DMRInfo = read.table( DMR, header = FALSE, sep = "\t", stringsAsFactors = FALSE, quote = "" ) peaks <- GRanges(seqnames = DMRInfo[, 1], ranges = IRanges (start = DMRInfo[, 2], end = DMRInfo[, 3])) source("https://bioconductor.org/biocLite.R") biocLite("EnsDb.Hsapiens.v75") peaks <- toGRanges(seqnames = DMRInfo[, 1], ranges = IRanges (start = DMRInfo[, 2], end = DMRInfo[, 3])) peaks <- toGRanges(DMRInfo) peaks <- toGRanges(seqnames = DMRInfo[, 1], ranges = IRanges (start = DMRInfo[, 2], end = DMRInfo[, 3])) DMR <- ("test-data/DMR.bed") DMRInfo = read.table( DMR, header = FALSE, sep = "\t", stringsAsFactors = FALSE, quote = "" ) peaks <- toGRanges(seqnames = DMRInfo[, 1], ranges = IRanges (start = DMRInfo[, 2], end = DMRInfo[, 3])) peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED"), feature=c("gene"), header=FALSE, (start = DMRInfo[, 2], end = DMRInfo[, 3])) peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED") feature=c("gene") header=FALSE,(start = DMRInfo[, 2], end = DMRInfo[, 3])) peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED"(start = DMRInfo[, 2], end = DMRInfo[, 3])) peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED"(start = DMRInfo[, 2], end = DMRInfo[, 3])) class(DMRInfo) toGRanges(DMRInfo, seqnames = DMRInfo[, 1],(start = DMRInfo[, 2], end = DMRInfo[, 3])) toGRanges(DMRInfo, seqnames = DMRInfo[, 1],(start = DMRInfo[, 2] end = DMRInfo[, 3])) toGRanges(DMRInfo, seqnames = DMRInfo[, 1],start = DMRInfo[, 2], end = DMRInfo[, 3]) toGRanges(DMRInfo, seqnames = DMRInfo[, 1](start = DMRInfo[, 2], end = DMRInfo[, 3])) toGRanges(DMRInfo, colNames = DMRInfo[, 1](start = DMRInfo[, 2], end = DMRInfo[, 3])) peaks <- toGRanges(DMRInfo) peaks <- GRanges(seqnames = DMRInfo[, 1], ranges = IRanges (start = DMRInfo[, 2], end = DMRInfo[, 3])) ??fread