Mercurial > repos > xuebing > sharplab_interval_analysis
view metaintv2.py @ 22:869c7664e584
Uploaded
author | xuebing |
---|---|
date | Sat, 31 Mar 2012 11:52:14 -0400 |
parents | 16ba480adf96 |
children |
line wrap: on
line source
''' get binned score of intervals,allow extension ''' import os,sys,numpy from resize import * from bx.bbi.bigwig_file import BigWigFile def binning(x,n): # make n bin of x y = numpy.zeros(n,dtype=float) if len(x) == 0: return y step = float(len(x))/n for k in range(n): i = int(step*k) j = int(step*(k+1)) + 1 y[k] = x[i:j].mean() #print i,j,k,y[k] return y def getBinnedScore(bwfile,intvfile,nbin): ''' get binned average and std ''' fbw = open(bwfile) bw = BigWigFile(file=fbw) fin = open(intvfile) avg = numpy.zeros(nbin) sqr = numpy.zeros(nbin) N = 0 for line in fin: #chrom,start,end,name,score,strand flds = line.strip().split('\t') #get the score at base resolution as an array scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2])) if scores == None: print 'not found:\t',line continue N = N + 1 #print line,scores # reverse if on minus strand if flds[5] == '-': scores = scores[::-1] # no score = 0 scores = numpy.nan_to_num(scores) # bin the data binned = binning(scores,nbin) avg = avg + binned sqr = sqr + binned**2 # compute avg and std avg = avg / N err = ((sqr/N-avg**2)**0.5)/(N**0.5) return avg,err def getExtendedBinScore(bwfile,intvfile,nbins,exts): ''' nbins: n1,n2,n3 exts: l1,l2,l3,l4 ''' # make left extension resize(intvfile,intvfile+'.tmp','start-'+str(exts[0]),'start+'+str(exts[1]),'stranded') # compute binned average avg,err = getBinnedScore(bwfile,intvfile+'.tmp',nbins[0]) # make center region resize(intvfile,intvfile+'.tmp','start+'+str(exts[1]),'end-'+str(exts[2]),'stranded') # compute binned average avg1,err1 = getBinnedScore(bwfile,intvfile+'.tmp',nbins[1]) avg = numpy.concatenate((avg,avg1)) err = numpy.concatenate((err,err1)) # make right region resize(intvfile,intvfile+'.tmp','end-'+str(exts[2]),'end+'+str(exts[3]),'stranded') # compute binned average avg2,err2 = getBinnedScore(bwfile,intvfile+'.tmp',nbins[2]) avg = numpy.concatenate((avg,avg2)) err = numpy.concatenate((err,err2)) return avg,err print sys.argv bwfile,intvfile,exts,nbins,outfile,outplot = sys.argv avg, err = getExtendedBinScore(bwfile,intvfile,numpy.fromstring(nbins,sep=','),numpy.fromstring(exts,sep=',')) out = open(outfile,'w') numpy.savetxt(out, avg, fmt='%.6f', delimiter=' ', newline=' ') out.write('\n') numpy.savetxt(out, err, fmt='%.6f', delimiter=' ', newline=' ') out.write('\n') out.close() # plot rscript = open("tmp.r","w") rscript.write("options(warn=-1)\n") rscript.write("x <- read.table('"+outfile+"')\n") rscript.write("pdf('"+outplot+"')\n") rscript.write("avg <- x[1,]\n") rscript.write("err <- x[2,]\n") rscript.write("print(x)\n") rscript.write("ylim=c(min(avg-err),max(avg+err))\n") rscript.write("xticks <- seq(ncol(x))\n") rscript.write("plot(xticks,avg,ylab='average coverage',type='l',lwd=0,ylim=ylim)\n") rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n") rscript.write("lines(xticks,avg,type='l',lwd=1)\n") rscript.write("dev.off()\n") rscript.close() os.system("R --vanilla < tmp.r") os.system("rm tmp.r")