Mercurial > repos > xuebing > sharplabtool
diff binnedAverage.py @ 14:76e1b1b21cce default tip
Deleted selected files
author | xuebing |
---|---|
date | Tue, 13 Mar 2012 19:05:10 -0400 |
parents | 292186c14b08 |
children |
line wrap: on
line diff
--- a/binnedAverage.py Sat Mar 10 08:17:36 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,77 +0,0 @@ -''' -get binned score of intervals,allow extension -''' - -import os,sys,numpy,random,string - -from resize import * - -from bx.bbi.bigwig_file import BigWigFile - -def binning(x,n): - # make n bin of x - y = numpy.zeros(n,dtype=float) - if len(x) == 0: - return y - step = float(len(x))/n - for k in range(n): - i = int(step*k) - j = int(step*(k+1)) + 1 - y[k] = x[i:j].mean() - #print i,j,k,y[k] - return y - -def getBinnedScore(bwfile,intvfile,outfile,outplot,nbin): - ''' - get binned average and std - ''' - fbw = open(bwfile) - bw = BigWigFile(file=fbw) - fin = open(intvfile) - out = open(outfile,'w') - zeros = '\t'.join(['0']*nbin) - for line in fin: - #chrom,start,end,name,score,strand - line = line.strip() - flds = line.split('\t') - #get the score at base resolution as an array - scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2])) - if scores == None: - print 'not found:\t',line - out.write(line+'\t'+zeros+'\n') - continue - # reverse if on minus strand - if flds[5] == '-': - scores = scores[::-1] - # no score = 0 - scores = numpy.nan_to_num(scores) - # bin the data - binned = binning(scores,nbin) - out.write(line+'\t'+'\t'.join(map(str,binned))+'\n') - fin.close() - out.close() - # plot - if nbin > 1: - tmp = "".join(random.sample(string.letters+string.digits, 8)) - rscript = open(tmp,"w") - rscript.write("options(warn=-1)\n") - rscript.write("x <- read.table('"+outfile+"',sep='\t')\n") - rscript.write("x <- x[,(ncol(x)+1-"+str(nbin)+"):ncol(x)]\n") - rscript.write("pdf('"+outplot+"')\n") - rscript.write("avg <- apply(x,2,mean)\n") - rscript.write("err <- apply(x,2,sd)/sqrt(nrow(x))\n") - rscript.write("print(avg)\n") - rscript.write("print(err)\n") - rscript.write("ylim=c(min(avg-err),max(avg+err))\n") - rscript.write("xticks <- seq(ncol(x))\n") - rscript.write("plot(xticks,avg,xlab='',ylab='average',type='l',lwd=0,ylim=ylim)\n") - rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n") - rscript.write("lines(xticks,avg,type='l',lwd=1)\n") - rscript.write("dev.off()\n") - rscript.close() - os.system("R --vanilla < "+tmp) - os.system("rm "+tmp) - -print sys.argv -prog,bwfile,intvfile,nbin,outfile,outplot = sys.argv -getBinnedScore(bwfile,intvfile,outfile,outplot,int(nbin))