Mercurial > repos > testtool > annotate_peak
view annotatePeak/.Rhistory @ 2:be66730c5c3b draft
Uploaded
author | testtool |
---|---|
date | Sat, 25 Feb 2017 11:29:01 -0500 |
parents | 92387cb81962 |
children | 7beb1a8f7cb0 |
line wrap: on
line source
TAB <- read.csv("input.csv") mysamples <- lapply(TAB$ID,function(x)getGEO(x)) input <- function(TAB) { if(is(TAB, "csv")){ TAB <- read.csv("input.csv")} else{ print("error in data file") }} input() TAB <- read.csv("input.csv")} TAB <- read.csv("input.csv") test_func <- function( clusterSize=2, cutoff=0.2, platform_id='HM450', genome_id='hg19') { args = commandArgs(trailingOnly=TRUE) methyl_file = args[1] ChiPseq_file = args[2] output_file = args[3] options(warn=-1) TAB=read.csv(methyl_file) ChiPseq=import(ChiPseq_file) if(is.null(TAB)){ stop("Must specify input files") }else{ mysamples <- lapply(TAB$ID,function(x) getGEO(x)) } wrappedFunction <- function(test_func) s0 <- lapply(mysamples,Table) id_ref<-lapply(s0,function(x)x$ID_REF) if(length(unique(id_ref)) != 1) { stop("Error different ID_REF for samples") } else if (is.null((unlist(unique(id_ref))))) { stop("NO GSM data avaliable") } else { values<-do.call("cbind",lapply(s0,function(x)x$VALUE)) colnames(values)=TAB$ID rownames(values)=id_ref[[1]] cg <- rownames(values) probe <- c(cg) hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id) probe.info <- hm450.hg19[probe] f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value, BP=as.numeric(probe.info@elementMetadata$probeStart)) designMatrix <- model.matrix(~ TAB$Phenotype) DMR <- bumphunter(values, design = designMatrix, pos=f$BP,cutoff=cutoff,chr=f$CHR) MAT <- DMR$table[which(DMR$table$L>=clusterSize),] METH <- GRanges(seqnames=MAT$chr, ranges=IRanges (start=MAT$start, end=MAT$end), value_pos=MAT$value) peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000) p <- peaks$peaklist$`probe.info///ChiPseq` peakAnno <- annotatePeak(p, file=peakAnno) output_file <- annotatePeak(p, file=output_file) }} test_func <- function( clusterSize=2, cutoff=0.2, platform_id='HM450', genome_id='hg19') { args = commandArgs(trailingOnly=TRUE) methyl_file = args[1] ChiPseq_file = args[2] output_file = args[3] options(warn=-1) TAB=read.csv(methyl_file) ChiPseq=import(ChiPseq_file) if(is.null(TAB)){ stop("Must specify input files") }else{ mysamples <- lapply(TAB$ID,function(x) getGEO(x)) } wrappedFunction <- function(test_func) s0 <- lapply(mysamples,Table) id_ref<-lapply(s0,function(x)x$ID_REF) if(length(unique(id_ref)) != 1) { stop("Error different ID_REF for samples") } else if (is.null((unlist(unique(id_ref))))) { stop("NO GSM data avaliable") } else { values<-do.call("cbind",lapply(s0,function(x)x$VALUE)) colnames(values)=TAB$ID rownames(values)=id_ref[[1]] cg <- rownames(values) probe <- c(cg) hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id) probe.info <- hm450.hg19[probe] f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value, BP=as.numeric(probe.info@elementMetadata$probeStart)) designMatrix <- model.matrix(~ TAB$Phenotype) DMR <- bumphunter(values, design = designMatrix, pos=f$BP,cutoff=cutoff,chr=f$CHR) MAT <- DMR$table[which(DMR$table$L>=clusterSize),] METH <- GRanges(seqnames=MAT$chr, ranges=IRanges (start=MAT$start, end=MAT$end), value_pos=MAT$value) peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000) p <- peaks$peaklist$`probe.info///ChiPseq` peakAnno <- annotatePeak(p, file=peakAnno) output_file <- annotatePeak(p, file=output_file) }} ) a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5) a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5) b<-c(2.9,2.7,2,6,2.7,2.5,2.5,3.2,3.1,3.3,2.8) a-b file<-read.csv("~/Documents/SS2.csv") head(file) e file t.test(file$WBT,file$WBA) t.test(file$WBT,file$WBA,paired=TRUE) mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x)) require("GEOquery",quietly = TRUE) require("BiocGenerics",quietly = TRUE) args <- commandArgs(trailingOnly = TRUE) csv_file = args[1] #csv_file <- ("test-data/input.csv") TAB=read.csv(csv_file) if(is.null(TAB)){ stop("Must specify input files") }else{ mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x)) } csv_file <- ("test-data/input.csv") TAB=read.csv(csv_file) figure = args[3] upsetplot(anno, vennpie=TRUE) dev.copy(png,'fig.png') png(upsetplot(anno, vennpie=TRUE)) upsetplot(anno, vennpie=TRUE) require("ChIPseeker",quietly = TRUE) require("ChIPpeakAnno",quietly = TRUE) upsetplot(anno, vennpie=TRUE) figure<-('test-data/figure.png') anno <- annotatePeak(peaks,level="gene", annoDb="org.Hs.eg.db" ) input=read.table(input,header = FALSE, sep="\t",stringsAsFactors=FALSE, quote="") input<-("test-data/bump.bed") input=read.table(input,header = FALSE, sep="\t",stringsAsFactors=FALSE, quote="") peaks <- GRanges(seqnames=input[,1],ranges=IRanges (start=input[,2],end= input[,3])) anno <- annotatePeak(peaks,level="gene", annoDb="org.Hs.eg.db" ) peakAnno_genes <- as.data.frame(anno) upsetplot(anno, vennpie=TRUE) figure<-('test-data/figure.png') png(figure) dev.off() png(figure) upsetplot(anno, vennpie=TRUE) org.Hs.eg.db ??org.Hs.eg.db require("ChIPseeker", quietly = TRUE) require("ChIPpeakAnno", quietly = TRUE) DMRInfo = read.table( DMR, header = FALSE, sep = "\t", stringsAsFactors = FALSE, quote = "" ) DMRPeaks <- GRanges(seqnames = DMRInfo[, 1], ranges = IRanges (start = DMRInfo[, 2], end = DMRInfo[, 3])) annotatePeak <- as.data.frame(annotatePeak(DMRPeaks, level = "gene", annoDb = "org.Hs.eg.db")) ??org.Hs.eg.db