view annotate_peak/.Rhistory @ 18:6a9b9694acbf draft

Uploaded
author testtool
date Mon, 20 Mar 2017 06:51:22 -0400
parents 53df7871db21
children
line wrap: on
line source

TAB <- read.csv("input.csv")
mysamples <- lapply(TAB$ID,function(x)getGEO(x))
input <- function(TAB) { if(is(TAB, "csv")){
TAB <- read.csv("input.csv")}
else{
print("error in data file")
}}
input()
TAB <- read.csv("input.csv")}
TAB <- read.csv("input.csv")
test_func <- function(
clusterSize=2,
cutoff=0.2,
platform_id='HM450',
genome_id='hg19')
{
args = commandArgs(trailingOnly=TRUE)
methyl_file = args[1]
ChiPseq_file = args[2]
output_file = args[3]
options(warn=-1)
TAB=read.csv(methyl_file)
ChiPseq=import(ChiPseq_file)
if(is.null(TAB)){
stop("Must specify input files")
}else{
mysamples <- lapply(TAB$ID,function(x) getGEO(x))
}
wrappedFunction <- function(test_func)
s0 <- lapply(mysamples,Table)
id_ref<-lapply(s0,function(x)x$ID_REF)
if(length(unique(id_ref)) != 1) {
stop("Error different ID_REF for samples")
} else if (is.null((unlist(unique(id_ref))))) {
stop("NO GSM data avaliable")
} else {
values<-do.call("cbind",lapply(s0,function(x)x$VALUE))
colnames(values)=TAB$ID
rownames(values)=id_ref[[1]]
cg <-  rownames(values)
probe <- c(cg)
hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id)
probe.info <- hm450.hg19[probe]
f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value,
BP=as.numeric(probe.info@elementMetadata$probeStart))
designMatrix <- model.matrix(~ TAB$Phenotype)
DMR <- bumphunter(values, design = designMatrix,
pos=f$BP,cutoff=cutoff,chr=f$CHR)
MAT <- DMR$table[which(DMR$table$L>=clusterSize),]
METH <- GRanges(seqnames=MAT$chr,
ranges=IRanges
(start=MAT$start,
end=MAT$end),
value_pos=MAT$value)
peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000)
p <- peaks$peaklist$`probe.info///ChiPseq`
peakAnno <- annotatePeak(p, file=peakAnno)
output_file <- annotatePeak(p, file=output_file)
}}
test_func <- function(
clusterSize=2,
cutoff=0.2,
platform_id='HM450',
genome_id='hg19')
{
args = commandArgs(trailingOnly=TRUE)
methyl_file = args[1]
ChiPseq_file = args[2]
output_file = args[3]
options(warn=-1)
TAB=read.csv(methyl_file)
ChiPseq=import(ChiPseq_file)
if(is.null(TAB)){
stop("Must specify input files")
}else{
mysamples <- lapply(TAB$ID,function(x) getGEO(x))
}
wrappedFunction <- function(test_func)
s0 <- lapply(mysamples,Table)
id_ref<-lapply(s0,function(x)x$ID_REF)
if(length(unique(id_ref)) != 1) {
stop("Error different ID_REF for samples")
} else if (is.null((unlist(unique(id_ref))))) {
stop("NO GSM data avaliable")
} else {
values<-do.call("cbind",lapply(s0,function(x)x$VALUE))
colnames(values)=TAB$ID
rownames(values)=id_ref[[1]]
cg <-  rownames(values)
probe <- c(cg)
hm450.hg19 <- getPlatform(platform=platform_id, genome=genome_id)
probe.info <- hm450.hg19[probe]
f <- data.table(probe=names(probe.info),CHR=as.data.frame(probe.info@seqnames)$value,
BP=as.numeric(probe.info@elementMetadata$probeStart))
designMatrix <- model.matrix(~ TAB$Phenotype)
DMR <- bumphunter(values, design = designMatrix,
pos=f$BP,cutoff=cutoff,chr=f$CHR)
MAT <- DMR$table[which(DMR$table$L>=clusterSize),]
METH <- GRanges(seqnames=MAT$chr,
ranges=IRanges
(start=MAT$start,
end=MAT$end),
value_pos=MAT$value)
peaks<-findOverlapsOfPeaks(probe.info,ChiPseq,maxgap=5000)
p <- peaks$peaklist$`probe.info///ChiPseq`
peakAnno <- annotatePeak(p, file=peakAnno)
output_file <- annotatePeak(p, file=output_file)
}}
)
a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5)
a<-c(3.2,3.7,3.6,3.9,3.7,3.5,3.8,4,3.5)
b<-c(2.9,2.7,2,6,2.7,2.5,2.5,3.2,3.1,3.3,2.8)
a-b
file<-read.csv("~/Documents/SS2.csv")
head(file)
e
file
t.test(file$WBT,file$WBA)
t.test(file$WBT,file$WBA,paired=TRUE)
mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x))
require("GEOquery",quietly = TRUE)
require("BiocGenerics",quietly = TRUE)
args <- commandArgs(trailingOnly = TRUE)
csv_file = args[1]
#csv_file <- ("test-data/input.csv")
TAB=read.csv(csv_file)
if(is.null(TAB)){
stop("Must specify input files")
}else{
mysamples <- lapply(TAB$ID,function(x) TablegetGEO(x))
}
csv_file <- ("test-data/input.csv")
TAB=read.csv(csv_file)
TAB
csv_file<-("test-data/input.csv")
TAB = read.csv(csv_file)
TAB
??bumphunter
??toGranges
require("ChIPseeker", quietly = TRUE)
require("ChIPpeakAnno", quietly = TRUE)
options(warn = -1)
args <- commandArgs(trailingOnly = TRUE)
DMR = args[1]
annoPeakTable = args[2]
DMRInfo = read.table(
DMR,
header = FALSE,
sep = "\t",
stringsAsFactors = FALSE,
quote = ""
)
peaks <- GRanges(seqnames = DMRInfo[, 1],
ranges = IRanges
(start = DMRInfo[, 2], end = DMRInfo[, 3]))
peaks[1:2]
DMR <- ("test-data/DMR.bed")
DMRInfo = read.table(
DMR,
header = FALSE,
sep = "\t",
stringsAsFactors = FALSE,
quote = ""
)
peaks <- GRanges(seqnames = DMRInfo[, 1],
ranges = IRanges
(start = DMRInfo[, 2], end = DMRInfo[, 3]))
source("https://bioconductor.org/biocLite.R")
biocLite("EnsDb.Hsapiens.v75")
peaks <- toGRanges(seqnames = DMRInfo[, 1],
ranges = IRanges
(start = DMRInfo[, 2], end = DMRInfo[, 3]))
peaks <- toGRanges(DMRInfo)
peaks <- toGRanges(seqnames = DMRInfo[, 1],
ranges = IRanges
(start = DMRInfo[, 2], end = DMRInfo[, 3]))
DMR <- ("test-data/DMR.bed")
DMRInfo = read.table(
DMR,
header = FALSE,
sep = "\t",
stringsAsFactors = FALSE,
quote = ""
)
peaks <- toGRanges(seqnames = DMRInfo[, 1],
ranges = IRanges
(start = DMRInfo[, 2], end = DMRInfo[, 3]))
peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED"), feature=c("gene"), header=FALSE,
(start = DMRInfo[, 2], end = DMRInfo[, 3]))
peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED")
feature=c("gene") header=FALSE,(start = DMRInfo[, 2], end = DMRInfo[, 3]))
peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED"(start = DMRInfo[, 2], end = DMRInfo[, 3]))
peaks <- toGRanges(seqnames = DMRInfo[, 1], format=c("BED"(start = DMRInfo[, 2], end = DMRInfo[, 3]))
class(DMRInfo)
toGRanges(DMRInfo, seqnames = DMRInfo[, 1],(start = DMRInfo[, 2], end = DMRInfo[, 3]))
toGRanges(DMRInfo, seqnames = DMRInfo[, 1],(start = DMRInfo[, 2] end = DMRInfo[, 3]))
toGRanges(DMRInfo, seqnames = DMRInfo[, 1],start = DMRInfo[, 2], end = DMRInfo[, 3])
toGRanges(DMRInfo, seqnames = DMRInfo[, 1](start = DMRInfo[, 2], end = DMRInfo[, 3]))
toGRanges(DMRInfo, colNames = DMRInfo[, 1](start = DMRInfo[, 2], end = DMRInfo[, 3]))
peaks <- toGRanges(DMRInfo)
peaks <- GRanges(seqnames = DMRInfo[, 1],
ranges = IRanges
(start = DMRInfo[, 2], end = DMRInfo[, 3]))
??fread