diff metaintv_ext.py @ 14:76e1b1b21cce default tip

Deleted selected files
author xuebing
date Tue, 13 Mar 2012 19:05:10 -0400
parents 292186c14b08
children
line wrap: on
line diff
--- a/metaintv_ext.py	Sat Mar 10 08:17:36 2012 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,128 +0,0 @@
-'''
-get binned score of intervals,allow extension
-'''
-
-import os,sys,numpy
-import string, random
-
-from resize import *
-
-from bx.bbi.bigwig_file import BigWigFile
-
-def binning(x,n):
-    # make n bin of x
-    y = numpy.zeros(n,dtype=float)
-    if len(x) == 0:
-        return y
-    step = float(len(x))/n
-    for k in range(n):
-        i = int(step*k)
-        j = int(step*(k+1)) + 1
-        y[k] = x[i:j].mean()
-        #print i,j,k,y[k]
-    return y
-
-def getBinnedScore(bwfile,intvfile,nbin):
-    '''
-    get binned average and std
-    '''
-    fbw = open(bwfile)
-    bw = BigWigFile(file=fbw)
-    fin = open(intvfile)
-    avg = numpy.zeros(nbin)
-    sqr = numpy.zeros(nbin)
-    N = 0
-    for line in fin:
-        #print N
-        #chrom,start,end,name,score,strand
-        flds = line.strip().split('\t')
-        #get the score at base resolution as an array
-        scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2]))
-        if scores == None:
-            print 'not found:\t',N,line
-            continue
-        N = N + 1
-        #print line,scores
-        # reverse if on minus strand
-        if flds[5] == '-':
-            scores = scores[::-1]
-        # no score = 0    
-        scores = numpy.nan_to_num(scores)
-        # bin the data
-        binned = binning(scores,nbin)
-        avg = avg + binned
-        sqr = sqr + binned**2
-    # compute avg and std
-    avg = avg / N
-    err = ((sqr/N-avg**2)**0.5)/(N**0.5)
-    return avg,err,N
-
-def getExtendedBinScore(bwfile,intvfile,nbins,exts):
-    '''
-    nbins: n1,n2,n3
-    exts: l1,l2,l3,l4
-    '''
-    avg = []
-    err = []
-    tmpfile = "".join(random.sample(string.letters+string.digits, 8))
-    if exts[0]>0 or exts[1]>0:
-        print 'make left extension'
-        resize(intvfile,tmpfile,'start-'+str(exts[0]),'start+'+str(exts[1]),'stranded')
-        print 'compute binned average'
-        avg,err,N = getBinnedScore(bwfile,tmpfile,nbins[0])
-        print 'regions used:',N
-    print 'make center region'
-    resize(intvfile,tmpfile,'start+'+str(exts[1]),'end-'+str(exts[2]),'stranded')
-    print 'compute binned average'
-    avg1,err1,N = getBinnedScore(bwfile,tmpfile,nbins[1])
-    print 'regions used:',N
-    avg = numpy.concatenate((avg,avg1))
-    err = numpy.concatenate((err,err1))
-    if exts[2]>0 or exts[3]>0:
-        print 'make right region'
-        resize(intvfile,tmpfile,'end-'+str(exts[2]),'end+'+str(exts[3]),'stranded')
-        print 'compute binned average'
-        avg2,err2,N = getBinnedScore(bwfile,tmpfile,nbins[2])
-        print 'regions used:',N
-        avg = numpy.concatenate((avg,avg2))
-        err = numpy.concatenate((err,err2))
-    os.system('rm '+tmpfile)
-    return avg,err
-
-prog,bwfile,intvfile,exts,nbins,outfile,outplot = sys.argv
-nbins = numpy.fromstring(nbins,dtype=int,sep=',')
-exts = numpy.fromstring(exts,dtype=int,sep=',')
-avg, err = getExtendedBinScore(bwfile,intvfile,nbins,exts)
-print 'save data'
-out = open(outfile,'w')
-numpy.savetxt(out, avg, fmt='%.6f', delimiter=' ', newline=' ')
-out.write('\n')
-numpy.savetxt(out, err, fmt='%.6f', delimiter=' ', newline=' ')
-out.write('\n')
-out.close()
-
-print 'plot'
-start = exts[0]*nbins[0]/(exts[0]+exts[1])
-end = nbins[0]+nbins[1]+exts[2]*nbins[2]/(exts[2]+exts[3])
-#print start,end
-rscript = open("tmp.r","w")
-rscript.write("options(warn=-1)\n")
-rscript.write("x <- read.table('"+outfile+"')\n")
-rscript.write("pdf('"+outplot+"')\n")
-rscript.write("avg <- x[1,]\n")
-rscript.write("err <- x[2,]\n")
-#rscript.write("print(x)\n")
-rscript.write("ylim=c(min(avg-err),max(avg+err))\n")
-rscript.write("xticks <- seq(ncol(x))\n")
-#rscript.write("print(xticks)\n")
-rscript.write("plot(xticks,avg,xlab='',ylab='average coverage',type='l',lwd=0,ylim=ylim,xaxt='n')\n")
-rscript.write("axis(1, at=c(min(xticks),"+str(start)+","+str(end)+",max(xticks)),labels=c(-"+str(exts[0])+",0,0,"+str(exts[3])+"), las=2)\n")
-rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n")
-rscript.write("lines(xticks,avg,type='l',lwd=1)\n")
-rscript.write("lines(c(min(xticks),max(xticks)),c(0,0),lwd=2)\n")
-rscript.write("lines(c("+str(start)+","+str(end)+"),c(0,0),lwd=10)\n")
-rscript.write("dev.off()\n")
-rscript.close()
-os.system("R --vanilla --slave < tmp.r")
-os.system("rm tmp.r")
-