# HG changeset patch # User mvdbeek # Date 1524234440 14400 # Node ID 1b5bd3b955ed8564495ff851bcbac44d2c52d09e planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/damidseq_polII_gene_call commit 2e97fdaeafdafe2c63588de17777cd32a798adb1-dirty diff -r 000000000000 -r 1b5bd3b955ed damid_polII_gene_call.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/damid_polII_gene_call.xml Fri Apr 20 10:27:20 2018 -0400 @@ -0,0 +1,50 @@ + + from DamIDseq data + + R + + + + + + + + + + + + + + + + + + + + + + + + + + 10.1093/bioinformatics/btv386 + + diff -r 000000000000 -r 1b5bd3b955ed polii.gene.call --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/polii.gene.call Fri Apr 20 10:27:20 2018 -0400 @@ -0,0 +1,372 @@ +#!/usr/bin/env Rscript + +# polii.gene.call.r +# Copyright © 2014-15, Owen Marshall + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or (at +# your option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 +# USA + +### FDR calcs ### +# Method based on original perl scripts by Tony Southall (TDS) as published in +# Southall et al. (2013). Dev Cell, 26(1), 101–12. doi:10.1016/j.devcel.2013.05.020 +# +# Significant modifications to the original methodology include: +# * taking a linear regression of log data rather than trial-and-error curve fitting of non-log data +# * using a linear regression for the final intercept value rather than using the average intercept value for all conditions +# -- both of these should increase the accuracy of the final FDR value. + +version <- "1.0.2" +cat(paste("polii.gene.call v",version,"\n", sep="")) + +library(tools) + +### Read CLI options +input.args <- commandArgs(trailingOnly = TRUE) + +in.files <- vector() +read.ops <- function (x) { + for (op in x) { + if (any(grepl("^--",op))) { + op <- gsub("^--","",op) + y <- unlist(strsplit(op,"=")) + + if (y[1] == "help") { + cat(paste("Usage: Rscript polii.gene.call.r --genes.file=[some_genes_file.gff] [list of .gatc.bedgraph or .gatc.gff ratio files to process]\n\n", sep="")) + + cat("Options:\n") + for (n in names(op.args)) { + cat(paste(" ",n,"=",op.args[[n]],"\n",sep="")) + } + cat("\n") + quit("no",1) + } + + if (!is.null(op.args[[ y[1] ]])) { + op.args[[ y[1] ]] <<- y[2] + } else { + cat("Error: Option",y[1],"not recognised ...\n") + quit("no",1) + } + } else { + in.files <<- c(in.files,op) + } + } +} + +write.ops <- function () { + out.df <- data.frame() + for (n in names(op.args)) { + v <<- as.character(op.args[[n]]) + df.line <- data.frame( + option=n, + value=v + ) + out.df <- rbind(out.df, df.line) + } + write.table(out.df,"input.args.single.txt",row.names=F) +} + +op.args <- list( + "genes.file" = "/mnt/data/Genomes/dmel_release/DmR6/DmR6.genes.gff", + "iter" = 50000, + "fdr" = 0.01 +) + +read.ops(input.args) + +if (length(in.files) == 0) { + cat("Usage: Rscript polii.gene.call.r [list of .gatc.gff ratio files to process]\n\n") + quit("no",1) +} + +write.ops() + +### save random seed for future reproducibility +dump.random <- runif(1) +my.seed <- .Random.seed +write.table(my.seed,".randomseed") + +### read genes file +cat("Reading genes data file ...\n") +genes.file=op.args[["genes.file"]] +genes <- read.table(genes.file, comment.char="#", sep="\t", quote="", fill=T) +names(genes) <- c('chr','source','type','start','end','score','strand','c','details') + +# only subset if there is a type termed "gene" +if (any(genes$type == 'gene')) { + genes <- subset(genes, type=='gene') +} + +genes$name <- sapply(genes$details, FUN = function (x) {regmatches(x,gregexpr("(?<=Name=).*?(?=;)", x, perl=T))} ) +genes <- genes[,c('chr','start','end','strand','name')] +genes$chr <- gsub("^chr","",genes$chr,perl=T) + +if (nrow(genes) == 0) { + cat("Error: unable to extract gene information from genes file\n\n") + quit("no",1) +} + +### functions +read.gff <- function (x,name="score") { + fn.ext <- file_ext(x) + + if (grepl("gff",ignore.case=T,fn.ext)) { + temp.data <- read.table(x,row.names=NULL) + if (ncol(temp.data) > 5) { + # GFF + trim.data <- temp.data[,c(1,4,5,6)] + } else { + cat("Error: file does not appear to be in GFF format\n\n") + quit("no",1) + } + } else if (grepl("bed",ignore.case=T,fn.ext)) { + temp.data <- read.table(x,row.names=NULL,skip=1) + if (ncol(temp.data) == 4) { + # bedgraph + trim.data <- temp.data + } else { + cat("Error: file does not appear to be in bedGraph format\n\n") + quit("no",1) + } + } else { + cat("Error: input file does not appear to be in bedGraph or GFF format ...\n\n") + quit("no",1) + } + + names(trim.data) <- c("chr","start","end",name) + + trim.data$chr <- gsub("^chr","",trim.data$chr,perl=T) + + return(trim.data) +} + +gene.exp <- function (input.df, buffer=0, iter=50000, debug=F) { + + avg.exp <- data.frame(input.df[1,c(4:(length(names(input.df))))]) + avg <- vector(length=(length(names(input.df)) - 4)) + avg.exp <- avg.exp[0,] + + ### FDR calcs ### + # Method based off perl scripts by Tony Southall (TDS) as published in + # Southall et al. (2013). Dev Cell, 26(1), 101–12. doi:10.1016/j.devcel.2013.05.020 + # + # Significant modifications to the original methodology include: + # * taking a linear regression of log data rather than trial-and-error curve fitting of non-log data + # * using a linear regression for the final intercept value rather than using the average intercept value for all conditions + # -- both of these should increase the accuracy of the final FDR value. + + input.len <- length(input.df[,1]) + frag.samp <- c(1,2,3,4,6,8,10,12,15) + thres.samp <- c(0.1,0.2,0.3,0.4,0.5,0.65,0.8,1.0,1.5,2.0) + + rand <- list() + + for (thres in thres.samp) { + + cat(paste(" Calculating FDR for threshold",thres,"\n",sep=" ")) + + # init vars + for (f in frag.samp) { + # List names are, e.g. frag 1, thres 0.2: rand[[thres.1.0.2]] + rand[[paste("thres.",f,".",thres,sep="")]] <- 0; + } + + for (i in 1:iter) { + if (i %% 200 == 0) {cat(paste(" iter",i,"\r"))} + # get random sample for different fragment lengths + + rand.samp <- list() + + for (f in frag.samp) { + # Using the fourth column as we're only calculating FDR for one sample ... + rand.samp[[paste("rand.",f,sep="")]] <- mean(input.df[runif(f,1,input.len),4]) + } + + # count number of times exp > thres + for (f in frag.samp) { + if (rand.samp[[paste("rand.",f,sep="")]] > thres) {rand[[paste("thres.",f,".",thres,sep="")]] <- rand[[paste("thres.",f,".",thres,sep="")]] + 1} + } + } + } + + rand.fdr <- list() + + for (thres in thres.samp) { + for (f in frag.samp) { + rand.fdr[[paste("thres.",f,".",thres,sep="")]] <- rand[[paste("thres.",f,".",thres,sep="")]]/iter + } + } + + cat("Fitting curves ...\n") + + # curve fit: fdr vs thresholds + var.thres <- list() + + for (thres in thres.samp) { + for (f in frag.samp) { + var.thres[[paste("frags.",f,sep="")]] <- append(var.thres[[paste("frags.",f,sep="")]], rand.fdr[[paste("thres.",f,".",thres,sep="")]]) + } + } + + inf.log.lm <- function (v) { + non.inf <- log(v) != -Inf + ret <- lm(log(v)[non.inf] ~ thres.samp[non.inf]) + return(ret) + } + + # The relationship is exponential, so we need log data for a linear regression + # (in R, linear regression is: y = lm$coefficients[[2]]x + lm$coefficients[[1]] ... ) + var.lm <- list() + for (f in frag.samp) { + var.lm[[paste("frags.",f,sep="")]] <- inf.log.lm(var.thres[[paste("frags.",f,sep="")]]) + } + + # ... and now we do a linear regression on the slopes and intercepts of our previous regressions + # (This is the clever bit, and it actually seems to work. The correlation of slope to fragment size is linear ... + # By doing this on the slope and intercept, we can now predict the FDR for any number of fragments with any expression.) + slope <- vector() + for (f in frag.samp) { + slope <- append(slope, var.lm[[paste("frags.",f,sep="")]]$coefficients[[2]]) + } + + # slope regression predicts the average slope + slope.lm <- lm(slope ~ frag.samp) + + # TDS used an average intercept value for the intercept, however ... + inter <- vector() + for (f in frag.samp) { + inter <- append(inter, var.lm[[paste("frags.",f,sep="")]]$coefficients[[1]]) + } + + # ... there's actually quite a bit of variation of the intercept with real data, + # so we're going to do a lin regression on the intercept instead. + # + # (I'm not convinced it's a true linear relationship. But it's close to linear, + # and will certainly perform better than taking the mean intercept ...) + # + # If you're interested, set the debug flag to TRUE and take a look at the plots generated below ... + inter.lm <- lm(inter ~ frag.samp) + + if (debug == T) { + # plots for debugging/checking + plot.debug <- function (y,x,l,name="Debug plot") { + plot(y ~ x) + abline(l) + + lsum <- summary(l) + r2 <- lsum$r.squared + + legend("topleft",legend=r2,bty='n') + title(name) + + dev.copy(png,paste(name,".png",sep=""));dev.off() + } + + plot.debug(slope,frag.samp,slope.lm,name="correlation of slope") + plot.debug(inter,frag.samp,inter.lm,name="correlation of intercepts") + } + + # ok, so putting that all together ... + fdr <- function (frags, expr) { + inter.test <- inter.lm$coefficients[[2]] * frags + inter.lm$coefficients[[1]] + slope.test <- slope.lm$coefficients[[2]] * frags + slope.lm$coefficients[[1]] + + fdr.out <- exp(slope.test * expr + inter.test) + return(fdr.out) + } + + ### Gene expression values ### + cat("Calculating gene values ...\n") + + count <- 0 + + # unroll chromosomes for speed: + for (chromo in unique(genes$chr)) { + input.chr <- subset(input.df, chr==chromo) + genes.chr <- subset(genes, chr==chromo) + for (i in 1:length(genes.chr$name)) { + # Roll through each gene + + # Note: the original script calculated expression values for a table of proteins, + # here we just limit ourselves to the FDR for the first column past "chr", "start" and "end" + # so if you're thinking of looking at chromatin, for example, put PolII as column 4 in your table! + + gene.start <- genes.chr[i,"start"] - buffer + gene.end <- genes.chr[i,"end"] + buffer + + gene.start <- ifelse(gene.start < 1, 1, gene.start) + + # Create data frames for all gatc fragments covering current gene + exp <- data.frame(input.chr[ (input.chr$start <= gene.end) + & (input.chr$end >= gene.start) + ,] ) + + gatc.num <- length(exp[,1]) + + # skip if no gatc fragments cover gene :( + if (gatc.num == 0) {next} + + # trim to gene boundaries ... + exp$start[1] <- gene.start + exp$end[length(exp[,1])] <- gene.end + + # gene length covered by gatc fragments + len <- sum(exp$end-exp$start) + + # calculate weighted score for each column (representing different proteins) + for (j in 4:length(names(input.chr))) { + avg[j] <- (sum((exp$end-exp$start)*exp[j]))/len + } + + # make data.frame of averages (to be appended to avg.exp) + df <- cbind(avg[1]) + for (k in 2:length(avg)) { + df <- cbind(df,avg[k]) + } + df <- cbind(df,gatc.num) + + # only fdr for first column for now ... + gene.fdr <- fdr(gatc.num,avg[4]) + df <- cbind(df, gene.fdr) + + # append current gene to list + avg.exp <- rbind(avg.exp,data.frame(name=as.character(genes.chr[i,"name"]), df)) + count <- count+1 + if (count %% 50 == 0) {cat(paste(count,"genes averaged ...\r"))} + } + } + + avg.exp <- avg.exp[,c(1,5:(length(names(avg.exp))))] + names(avg.exp) <- c("name",names(input.df)[c(4:(length(names(input.df))))],"gatc.num","FDR") + + return(avg.exp) +} + +for (name in in.files) { + cat(paste("\nNow working on",name,"...\n")) + bname <- basename(name) + fname <- sub("\\..*","",bname,perl=T) + + polii <- read.gff(name,"polii") + + polii.exp <- gene.exp(polii, iter=as.numeric(op.args[["iter"]])) + + out <- subset(polii.exp,FDR < op.args[["fdr"]]) + + write.table(polii.exp,paste(fname,"genes.details.csv",sep="."),row.names=F,col.names=T,quote=T,sep="\t") + write.table(out$name, paste(fname,"genes",sep="."),row.names=F,col.names=F,quote=F) +} + +cat("\nAll done.\n\n") diff -r 000000000000 -r 1b5bd3b955ed test-data/details.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/details.csv Fri Apr 20 10:27:20 2018 -0400 @@ -0,0 +1,11 @@ +"name" "polii" "gatc.num" "FDR" +"CR46254" 0.356507105200981 5 0.791940813158031 +"CR45339" 1.20628998889217 2 0.260660674330585 +"CR45340" 1.93867996741526 5 0.0511077462101616 +"dbr" 1.2951093442343 22 0.0152079814861417 +"CR44987" 2.49935540607138 24 1.97089127262964e-05 +"galectin" 2.38923790079731 21 9.84816956841954e-05 +"CG11374" 0.00926960982128576 5 1.4451260959191 +"net" 0.663390756192582 14 0.394156836924802 +"Zir" -0.337727067030228 30 181.086928253756 +"CG11377" 0.263311791595758 9 1.16681003833418 diff -r 000000000000 -r 1b5bd3b955ed test-data/genes.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genes.csv Fri Apr 20 10:27:20 2018 -0400 @@ -0,0 +1,5 @@ +CR46254 +CR45339 +CR45340 +dbr +net diff -r 000000000000 -r 1b5bd3b955ed test-data/genes.gff --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/genes.gff Fri Apr 20 10:27:20 2018 -0400 @@ -0,0 +1,16 @@ +2L FlyBase gene 54817 55767 . + . ID=FBgn0267987;Name=CR46254;Ontology_term=SO:0000011,SO:0000087;Dbxref=FlyBase_Annotation_IDs:CR46254;derived_computed_cyto=21B1-21B1 +2L FlyBase ncRNA 54817 55767 . + . ID=FBtr0347585;Name=CR46254-RA;Parent=FBgn0267987;Dbxref=FlyBase_Annotation_IDs:CR46254-RA;score_text=Weakly Supported;score=0 +2L FlyBase gene 65999 66242 . + . ID=FBgn0266878;Name=CR45339;Ontology_term=SO:0000011,SO:0000087;Dbxref=FlyBase_Annotation_IDs:CR45339,EntrezGene:19834983,GenomeRNAi:19834983;derived_computed_cyto=21B1-21B1 +2L FlyBase ncRNA 65999 66242 . + . ID=FBtr0345732;Name=CR45339-RA;Parent=FBgn0266878;Dbxref=FlyBase_Annotation_IDs:CR45339-RA,REFSEQ:NR_123995;score_text=Weakly Supported;score=0 +2L FlyBase gene 66318 66524 . + . ID=FBgn0266879;Name=CR45340;Ontology_term=SO:0000011,SO:0000087;Dbxref=FlyBase_Annotation_IDs:CR45340,EntrezGene:19835538,GenomeRNAi:19835538;derived_computed_cyto=21B1-21B1 +2L FlyBase ncRNA 66318 66524 . + . ID=FBtr0345733;Name=CR45340-RA;Parent=FBgn0266879;Dbxref=FlyBase_Annotation_IDs:CR45340-RA,REFSEQ:NR_123996;score_text=Weakly Supported;score=0 +2L FlyBase gene 66482 71390 . + . ID=FBgn0067779;Name=dbr;fullname=debra;Alias=FBgn0028472,FBgn0043386,CG11371,Dbr,EP456,BcDNA:LD26519,EP0456,Debra;Ontology_term=SO:0000010,SO:0000087,GO:0005771,GO:0007616,GO:0005634,GO:0008270;Dbxref=FlyBase:FBan0011371,FlyBase_Annotation_IDs:CG11371,GB_protein:AAF51565,GB:AA942362,GB:AF184228,GB_protein:AAD55739,GB:AI109558,GB:AI402147,GB:AQ026007,GB:AQ073408,GB:AW942847,GB:AY047576,GB_protein:AAK77308,GB:CL527762,GB:CZ468654,GB:CZ470443,GB:CZ470758,GB:CZ471260,GB:CZ474676,GB:CZ474677,GB:CZ477845,GB:CZ482562,GB:CZ487746,UniProt/TrEMBL:Q9V474,INTERPRO:IPR012934,OrthoDB7_Drosophila:EOG79H5RG,GB_protein:AFH03489,GB_protein:AFH03487,GB_protein:AFH03486,GB_protein:AFH03488,EntrezGene:33161,UniProt/TrEMBL:Q961T5,BIOGRID:59424,FlyAtlas:CG11371-RB,InterologFinder:33161,GenomeRNAi:33161,INTERACTIVEFLY:/genebrief/debra.htm;gbunit=AE014134;derived_computed_cyto=21B1-21B1 +2L FlyBase gene 71039 73836 . - . ID=FBgn0266322;Name=CR44987;Ontology_term=SO:0000011,SO:0000087,SO:0000077;Dbxref=EntrezGene:19835103,FlyBase_Annotation_IDs:CR44987,GenomeRNAi:19835103;derived_computed_cyto=21B1-21B1 +2L FlyBase ncRNA 71039 73836 . - . ID=FBtr0344052;Name=CR44987-RA;Parent=FBgn0266322;Dbxref=REFSEQ:NR_123998,FlyBase_Annotation_IDs:CR44987-RA;score_text=Weakly Supported;score=0 +2L FlyBase ncRNA 71039 73836 . - . ID=FBtr0344053;Name=CR44987-RB;Parent=FBgn0266322;Dbxref=REFSEQ:NR_123997,FlyBase_Annotation_IDs:CR44987-RB;score_text=Weakly Supported;score=0 +2L FlyBase gene 71757 76211 . + . ID=FBgn0031213;Name=galectin;fullname=galectin;Alias=CG11372,Dmgal;Ontology_term=SO:0000010,SO:0000087,GO:0016936,GO:0005829,GO:0015143,GO:0008039,GO:0030246;Dbxref=FlyBase:FBan0011372,FlyBase_Annotation_IDs:CG11372,GB_protein:AAF51564,GB_protein:ACZ94130,GB_protein:ACZ94129,GB:AF338142,GB_protein:AAL87743,GB:AY071294,GB_protein:AAL48916,GB:BI236456,GB:CZ467438,GB:CZ467439,GB:CZ468006,GB:CZ481733,GB:CZ481734,GB:CZ485874,GB:CZ485875,UniProt/TrEMBL:Q9VPI6,INTERPRO:IPR001079,INTERPRO:IPR013320,GB_protein:AGB92325,UniProt/TrEMBL:C0PUZ0,UniProt/TrEMBL:M9NCX9,OrthoDB7_Drosophila:EOG7XQ9R2,OrthoDB7_Diptera:EOG76TN7W,GB_protein:AFH03490,EntrezGene:33162,UniProt/TrEMBL:E1JHP9,UniProt/TrEMBL:E1JHQ0,BDGP_clone:RE32514,OrthoDB7_Insecta:EOG76QSHF,OrthoDB7_Arthropoda:EOG7WTGG5,OrthoDB7_Metazoa:EOG7S7SFG,InterologFinder:33162,BIOGRID:59425,FlyAtlas:CG11372-RA,GenomeRNAi:33162;gbunit=AE014134;derived_computed_cyto=21B1-21B1 +2L FlyBase gene 76348 77783 . + . ID=FBgn0031214;Name=CG11374;Ontology_term=SO:0000010,SO:0000087,GO:0016936,GO:0015143,GO:0030246;Dbxref=FlyBase:FBan0011374,FlyBase_Annotation_IDs:CG11374,UniProt/TrEMBL:Q9VPI7,INTERPRO:IPR001079,INTERPRO:IPR013320,OrthoDB7_Drosophila:EOG735GGT,GB_protein:AAF51563,EntrezGene:33163,OrthoDB7_Insecta:EOG76QSHF,OrthoDB7_Arthropoda:EOG7WTGG5,OrthoDB7_Metazoa:EOG7S7SFG,InterologFinder:33163,FlyAtlas:CG11374-RA,GenomeRNAi:33163;gbunit=AE014134;derived_computed_cyto=21B1-21B1 +2L FlyBase gene 82421 87387 . - . ID=FBgn0002931;Name=net;fullname=net;Alias=FBgn0031215,CG11450,Shout,Group IId,shout,bHLHc42;Ontology_term=SO:0000010,SO:0000087,GO:0005634,GO:0003700,GO:0008586,GO:0006355,GO:0007474,GO:0000122,GO:0046983;Dbxref=FlyBase:FBan0011450,FlyBase_Annotation_IDs:CG11450,GB_protein:AAF51562,GB:AF303350,GB_protein:AAK14073,GB:CZ478919,GB:CZ478940,UniProt/TrEMBL:Q9VPI8,INTERPRO:IPR011598,GB_protein:AGB92326,OrthoDB7_Drosophila:EOG774925,OrthoDB7_Diptera:EOG786TM8,EntrezGene:45339,BDGP_clone:PC00159,UniProt/TrEMBL:Q7KH24,OrthoDB7_Insecta:EOG73VJM7,OrthoDB7_Arthropoda:EOG7SRH34,OrthoDB7_Metazoa:EOG73NG5F,BIOGRID:69604,FlyAtlas:CG11450-RA,InterologFinder:45339,GenomeRNAi:45339,INTERACTIVEFLY:/gene/net1.htm;gbunit=AE014134;derived_computed_cyto=21B1-21B1 +2L FlyBase gene 94739 102086 . + . ID=FBgn0031216;Name=Zir;fullname=Zizimin-related;Alias=FBgn0025528,anon-EST:Posey54,Dm zir,CG11376,FBgn 31216;Ontology_term=SO:0000010,SO:0000087,GO:0035010,GO:0006909,GO:0005089,GO:0035023,GO:0035011,GO:0005575;Dbxref=FlyBase:FBan0011376,FlyBase_Annotation_IDs:CG11376,GB_protein:AAF51561,GB:AA391952,GB:AF083301,GB:AW942162,GB:AZ877629,GB:BT015273,GB_protein:AAT94502,GB:CZ468078,GB:CZ468665,GB:CZ469343,GB:CZ470525,GB:CZ470937,GB:CZ480810,GB:CZ480811,GB:CZ483732,UniProt/TrEMBL:Q9VPI9,INTERPRO:IPR027007,INTERPRO:IPR027357,INTERPRO:IPR026791,OrthoDB7_Drosophila:EOG7DP1D5,OrthoDB7_Diptera:EOG78133C,EntrezGene:33165,INTERPRO:IPR021816,INTERPRO:IPR010703,BDGP_clone:LD10883,OrthoDB7_Insecta:EOG71CSK0,OrthoDB7_Arthropoda:EOG735G9K,OrthoDB7_Metazoa:EOG7P8P7G,BIOGRID:59427,InterologFinder:33165,FlyAtlas:CG11376-RA,Fly-FISH:CG11376,GenomeRNAi:33165;gbunit=AE014134;derived_computed_cyto=21B1-21B2 +2L FlyBase gene 102380 104142 . + . ID=FBgn0031217;Name=CG11377;Alias=CT31762;Ontology_term=SO:0000010,SO:0000087,GO:0005509,SO:0000548;Dbxref=FlyBase:FBan0011377,FlyBase_Annotation_IDs:CG11377,GB_protein:AAF51560,GB:AY071548,GB_protein:AAL49170,GB:BI371522,UniProt/TrEMBL:Q9VPJ0,INTERPRO:IPR000742,INTERPRO:IPR002049,INTERPRO:IPR006212,INTERPRO:IPR009030,INTERPRO:IPR013032,INTERPRO:IPR018097,INTERPRO:IPR021852,INTERPRO:IPR001881,GB_protein:AGB92327,UniProt/TrEMBL:M9PDP6,OrthoDB7_Drosophila:EOG799F97,OrthoDB7_Diptera:EOG7QK9VM,EntrezGene:33166,OrthoDB7_Insecta:EOG7GR8VQ,OrthoDB7_Arthropoda:EOG7PSCW4,OrthoDB7_Metazoa:EOG7Z3F4B,InterologFinder:33166,BIOGRID:59428,FlyAtlas:CG11377-RA,Fly-FISH:CG11377,GenomeRNAi:33166;gbunit=AE014134;derived_computed_cyto=21B2-21B2 + diff -r 000000000000 -r 1b5bd3b955ed test-data/polII.bed --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/polII.bed Fri Apr 20 10:27:20 2018 -0400 @@ -0,0 +1,111 @@ +2L 54816 55594 0.0552525047833769 +2L 54816 55594 0.0552525047833769 +2L 55594 55635 0.818636112867944 +2L 55594 55635 0.818636112867944 +2L 55635 55767 1.09961409723048 +2L 55635 55767 1.09961409723048 +2L 65998 66242 1.20628998889217 +2L 65998 66242 1.20628998889217 +2L 66317 66422 1.20628998889217 +2L 66317 66422 1.20628998889217 +2L 66422 66524 2.5583945646271 +2L 66422 66524 2.5583945646271 +2L 66481 67274 2.5583945646271 +2L 67274 67324 1.98680486782346 +2L 67324 67817 2.08837880305762 +2L 67817 67905 1.86112283695141 +2L 67905 68330 1.62486655160691 +2L 68330 68393 1.54900967010231 +2L 68393 68886 1.11119688964443 +2L 68886 69103 0.869617644974253 +2L 69103 69241 0.387235732197361 +2L 69241 69301 0.418133867661119 +2L 69301 69792 0.899479539841806 +2L 69792 69796 0.823796983227429 +2L 69796 69820 0.823796983227429 +2L 69820 69878 0.549942264108176 +2L 69878 69959 0.761892304255089 +2L 69959 70111 1.1531457850107 +2L 70111 71390 0.846518854258207 +2L 71038 71492 0.846518854258207 +2L 71038 71492 0.846518854258207 +2L 71038 71492 0.846518854258207 +2L 71492 71990 3.39665608645984 +2L 71492 71990 3.39665608645984 +2L 71492 71990 3.39665608645984 +2L 71756 71990 3.39665608645984 +2L 71990 72652 3.40816353628327 +2L 71990 72652 3.40816353628327 +2L 71990 72652 3.40816353628327 +2L 71990 72652 3.40816353628327 +2L 72652 72787 2.8478823465353 +2L 72652 72787 2.8478823465353 +2L 72652 72787 2.8478823465353 +2L 72652 72787 2.8478823465353 +2L 72787 73420 2.9137462183582 +2L 72787 73420 2.9137462183582 +2L 72787 73420 2.9137462183582 +2L 72787 73420 2.9137462183582 +2L 73420 73836 1.07916756002746 +2L 73420 73836 1.07916756002746 +2L 73420 73836 1.07916756002746 +2L 73420 75584 1.07916756002746 +2L 75584 76211 0.351389501360189 +2L 76347 76557 0.351389501360189 +2L 76557 76602 0.111868083546615 +2L 76602 76694 -0.103143189574513 +2L 76694 77646 -0.0832154089110144 +2L 77646 77783 0.171807761122692 +2L 82420 82686 0.950799825059922 +2L 82686 82987 0.860805501917355 +2L 82987 83358 0.9016755036993 +2L 83358 83622 0.4912523962063 +2L 83622 83828 0.574814522065537 +2L 83828 84509 0.950829410021384 +2L 84509 84651 1.13276692007025 +2L 84651 84867 1.40640395753805 +2L 84867 85682 0.810578385233812 +2L 85682 85687 0.675864965432377 +2L 85687 86075 0.661825886242909 +2L 86075 86215 0.349192348167705 +2L 86215 86316 0.0920730976226941 +2L 86316 87387 0.102318953834602 +2L 94738 95132 -0.106500647321927 +2L 95132 95199 -0.0638569168395719 +2L 95199 95277 0.217427011118734 +2L 95277 95514 0.267705566854591 +2L 95514 95662 0.0612754941064784 +2L 95662 95731 -0.429957911575681 +2L 95731 95761 -0.556019144100863 +2L 95761 96385 -0.725477533205267 +2L 96385 96412 -0.655603418929584 +2L 96412 96491 -0.409536719736943 +2L 96491 97369 -0.226244771136778 +2L 97369 97583 -0.308700411197029 +2L 97583 97642 0.0394679913725759 +2L 97642 97825 0.217457643667941 +2L 97825 98118 0.0793574230565764 +2L 98118 98244 -0.00404328552099076 +2L 98244 98316 -0.285051719568665 +2L 98316 98529 -0.52337489352199 +2L 98529 98813 -0.853641568435678 +2L 98813 99183 -1.20840278715919 +2L 99183 99266 -0.57988161546026 +2L 99266 99666 -0.0718062889470329 +2L 99666 99974 -0.313463689842528 +2L 99974 100074 -0.962054132059592 +2L 100074 100636 -1.19450144164682 +2L 100636 100841 -0.134973412111241 +2L 100841 101514 0.0638744109411819 +2L 101514 101553 -0.0533119771867642 +2L 101553 102022 -0.0462385325318383 +2L 102379 102687 -0.0746051708557362 +2L 102022 102086 -0.0746051708557362 +2L 102687 102730 -0.0466007641328778 +2L 102730 102832 0.129772291996865 +2L 102832 103057 0.328567918234615 +2L 103057 103345 0.287560490314009 +2L 103345 103349 0.278800162489859 +2L 103349 103506 -0.00904364838917334 +2L 103506 103683 0.126271941123851 +2L 103683 104142 0.646700520857224