Mercurial > repos > xuebing > sharplab_interval_analysis
view binnedAverage.py @ 20:16ba480adf96
Uploaded
author | xuebing |
---|---|
date | Sat, 31 Mar 2012 08:31:22 -0400 |
parents | |
children |
line wrap: on
line source
''' get binned score of intervals,allow extension ''' import os,sys,numpy,random,string from resize import * from bx.bbi.bigwig_file import BigWigFile def binning(x,n): # make n bin of x y = numpy.zeros(n,dtype=float) if len(x) == 0: return y step = float(len(x))/n for k in range(n): i = int(step*k) j = int(step*(k+1)) + 1 y[k] = x[i:j].mean() #print i,j,k,y[k] return y def getBinnedScore(bwfile,intvfile,outfile,outplot,nbin): ''' get binned average and std ''' fbw = open(bwfile) bw = BigWigFile(file=fbw) fin = open(intvfile) out = open(outfile,'w') zeros = '\t'.join(['0']*nbin) for line in fin: #chrom,start,end,name,score,strand line = line.strip() flds = line.split('\t') #get the score at base resolution as an array scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2])) if scores == None: print 'not found:\t',line out.write(line+'\t'+zeros+'\n') continue # reverse if on minus strand if flds[5] == '-': scores = scores[::-1] # no score = 0 scores = numpy.nan_to_num(scores) # bin the data binned = binning(scores,nbin) out.write(line+'\t'+'\t'.join(map(str,binned))+'\n') fin.close() out.close() # plot if nbin > 1: tmp = "".join(random.sample(string.letters+string.digits, 8)) rscript = open(tmp,"w") rscript.write("options(warn=-1)\n") rscript.write("x <- read.table('"+outfile+"',sep='\t')\n") rscript.write("x <- x[,(ncol(x)+1-"+str(nbin)+"):ncol(x)]\n") rscript.write("pdf('"+outplot+"')\n") rscript.write("avg <- apply(x,2,mean)\n") rscript.write("err <- apply(x,2,sd)/sqrt(nrow(x))\n") rscript.write("print(avg)\n") rscript.write("print(err)\n") rscript.write("ylim=c(min(avg-err),max(avg+err))\n") rscript.write("xticks <- seq(ncol(x))\n") rscript.write("plot(xticks,avg,xlab='',ylab='average',type='l',lwd=0,ylim=ylim)\n") rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n") rscript.write("lines(xticks,avg,type='l',lwd=1)\n") rscript.write("dev.off()\n") rscript.close() os.system("R --vanilla < "+tmp) os.system("rm "+tmp) print sys.argv prog,bwfile,intvfile,nbin,outfile,outplot = sys.argv getBinnedScore(bwfile,intvfile,outfile,outplot,int(nbin))