Mercurial > repos > xuebing > sharplabtool
diff intervalOverlap.py @ 14:76e1b1b21cce default tip
Deleted selected files
author | xuebing |
---|---|
date | Tue, 13 Mar 2012 19:05:10 -0400 |
parents | 292186c14b08 |
children |
line wrap: on
line diff
--- a/intervalOverlap.py Sat Mar 10 08:17:36 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -''' -find overlap and test signifiance -''' - -import os,sys - -def lineCount(filename): - i = 0 - with open(filename) as f: - for i, l in enumerate(f): - pass - return i + 1 - -def intersect(fileA,fileB,outfile,fraction,reciprocal): - # return fileA intervals that overlap with interval in fileB - cmd = 'intersectBed -a '+fileA+' -b '+fileB + ' --wo -f '+fraction +' '+ reciprocal + '>'+outfile - #print cmd - os.system(cmd) - -def parseIntersect(filename): - # get number of overlapped A and B - nA = 0 - nB = 0 - return nA,nb - -def shuffle(fileA,fileB,genomefile,fraction,reciprocal,N): - # shuffle fileA N times, return the distribution of overlaps - nOverlap = [] - for i in range(N): - # shuffle fileA using shuffleBed - #cmd = 'shuffleBed -i '+fileA+' -g '+genomefile +'>fileA.shuffled' - # using random_interval.py - cmd = 'python /Users/xuebing/galaxy-dist/tools/mytools/random_interval.py '+fileA+' fileA.shuffled across '+genomefile - os.system(cmd) - intersect('fileA.shuffled',fileB,'tmp',fraction,reciprocal) - nOverlap.append(lineCount('tmp')) - os.system('rm tmp') - os.system('rm fileA.shuffled') - return nOverlap - -def main(): - fileA = sys.argv[1] - fileB = sys.argv[2] - outfile = sys.argv[3] - outplot = sys.argv[4] - N = int(sys.argv[5]) # times to shuffle - genomefile = sys.argv[6] - fraction = sys.argv[7] - if len(sys.argv) == 9: - reciprocal = sys.argv[8] # can only be '-r' - else: - reciprocal = '' - - print sys.argv - - # intersect on real data - intersect(fileA,fileB,outfile,fraction,reciprocal) - # number of overlaps - nOverlapReal = lineCount(outfile) - - print 'number of intervals in inputA that overlap with intervals in inputB:',nOverlapReal - - # shuffle fileA to estimate background - nOverlapNull = shuffle(fileA,fileB,genomefile,fraction,reciprocal,N) - - # plot histogram - rscript = open('tmp.r','w') - rscript.write("x0 <- "+str(nOverlapReal)+"\n") - rscript.write("x <- c("+','.join(map(str,nOverlapNull))+")\n") - rscript.write("library(MASS)\n") - rscript.write("\n") - rscript.write("pv <- min((1+sum(x>=x0))/length(x),(1+sum(x<=x0))/length(x))\n") - rscript.write("title <- paste('actual:chance = ',x0,':',round(mean(x)),' = ',format(x0/mean(x),digits=1,nsmall=2),', p-value < ',pv,sep='')\n") - rscript.write("pdf('"+outplot+"')\n") - rscript.write("h <- hist(x,breaks=50,xlab='number of overlaps',ylab='frequency',main=title)\n") - rscript.write("plot(h$mids,h$counts,type='h',xlim=c(min(h$mids,x0),max(x0,h$mids)),ylim=c(0,max(h$counts)),xlab='number of overlaps',ylab='frequency',main=title)\n") - rscript.write("points(x0,0,col='red')\n") - rscript.write("dev.off()") - rscript.close() - os.system("R --vanilla < tmp.r") - os.system('rm tmp.r') -main()