Mercurial > repos > xuebing > sharplabtool
diff metaintv_ext.py @ 14:76e1b1b21cce default tip
Deleted selected files
author | xuebing |
---|---|
date | Tue, 13 Mar 2012 19:05:10 -0400 |
parents | 292186c14b08 |
children |
line wrap: on
line diff
--- a/metaintv_ext.py Sat Mar 10 08:17:36 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,128 +0,0 @@ -''' -get binned score of intervals,allow extension -''' - -import os,sys,numpy -import string, random - -from resize import * - -from bx.bbi.bigwig_file import BigWigFile - -def binning(x,n): - # make n bin of x - y = numpy.zeros(n,dtype=float) - if len(x) == 0: - return y - step = float(len(x))/n - for k in range(n): - i = int(step*k) - j = int(step*(k+1)) + 1 - y[k] = x[i:j].mean() - #print i,j,k,y[k] - return y - -def getBinnedScore(bwfile,intvfile,nbin): - ''' - get binned average and std - ''' - fbw = open(bwfile) - bw = BigWigFile(file=fbw) - fin = open(intvfile) - avg = numpy.zeros(nbin) - sqr = numpy.zeros(nbin) - N = 0 - for line in fin: - #print N - #chrom,start,end,name,score,strand - flds = line.strip().split('\t') - #get the score at base resolution as an array - scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2])) - if scores == None: - print 'not found:\t',N,line - continue - N = N + 1 - #print line,scores - # reverse if on minus strand - if flds[5] == '-': - scores = scores[::-1] - # no score = 0 - scores = numpy.nan_to_num(scores) - # bin the data - binned = binning(scores,nbin) - avg = avg + binned - sqr = sqr + binned**2 - # compute avg and std - avg = avg / N - err = ((sqr/N-avg**2)**0.5)/(N**0.5) - return avg,err,N - -def getExtendedBinScore(bwfile,intvfile,nbins,exts): - ''' - nbins: n1,n2,n3 - exts: l1,l2,l3,l4 - ''' - avg = [] - err = [] - tmpfile = "".join(random.sample(string.letters+string.digits, 8)) - if exts[0]>0 or exts[1]>0: - print 'make left extension' - resize(intvfile,tmpfile,'start-'+str(exts[0]),'start+'+str(exts[1]),'stranded') - print 'compute binned average' - avg,err,N = getBinnedScore(bwfile,tmpfile,nbins[0]) - print 'regions used:',N - print 'make center region' - resize(intvfile,tmpfile,'start+'+str(exts[1]),'end-'+str(exts[2]),'stranded') - print 'compute binned average' - avg1,err1,N = getBinnedScore(bwfile,tmpfile,nbins[1]) - print 'regions used:',N - avg = numpy.concatenate((avg,avg1)) - err = numpy.concatenate((err,err1)) - if exts[2]>0 or exts[3]>0: - print 'make right region' - resize(intvfile,tmpfile,'end-'+str(exts[2]),'end+'+str(exts[3]),'stranded') - print 'compute binned average' - avg2,err2,N = getBinnedScore(bwfile,tmpfile,nbins[2]) - print 'regions used:',N - avg = numpy.concatenate((avg,avg2)) - err = numpy.concatenate((err,err2)) - os.system('rm '+tmpfile) - return avg,err - -prog,bwfile,intvfile,exts,nbins,outfile,outplot = sys.argv -nbins = numpy.fromstring(nbins,dtype=int,sep=',') -exts = numpy.fromstring(exts,dtype=int,sep=',') -avg, err = getExtendedBinScore(bwfile,intvfile,nbins,exts) -print 'save data' -out = open(outfile,'w') -numpy.savetxt(out, avg, fmt='%.6f', delimiter=' ', newline=' ') -out.write('\n') -numpy.savetxt(out, err, fmt='%.6f', delimiter=' ', newline=' ') -out.write('\n') -out.close() - -print 'plot' -start = exts[0]*nbins[0]/(exts[0]+exts[1]) -end = nbins[0]+nbins[1]+exts[2]*nbins[2]/(exts[2]+exts[3]) -#print start,end -rscript = open("tmp.r","w") -rscript.write("options(warn=-1)\n") -rscript.write("x <- read.table('"+outfile+"')\n") -rscript.write("pdf('"+outplot+"')\n") -rscript.write("avg <- x[1,]\n") -rscript.write("err <- x[2,]\n") -#rscript.write("print(x)\n") -rscript.write("ylim=c(min(avg-err),max(avg+err))\n") -rscript.write("xticks <- seq(ncol(x))\n") -#rscript.write("print(xticks)\n") -rscript.write("plot(xticks,avg,xlab='',ylab='average coverage',type='l',lwd=0,ylim=ylim,xaxt='n')\n") -rscript.write("axis(1, at=c(min(xticks),"+str(start)+","+str(end)+",max(xticks)),labels=c(-"+str(exts[0])+",0,0,"+str(exts[3])+"), las=2)\n") -rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n") -rscript.write("lines(xticks,avg,type='l',lwd=1)\n") -rscript.write("lines(c(min(xticks),max(xticks)),c(0,0),lwd=2)\n") -rscript.write("lines(c("+str(start)+","+str(end)+"),c(0,0),lwd=10)\n") -rscript.write("dev.off()\n") -rscript.close() -os.system("R --vanilla --slave < tmp.r") -os.system("rm tmp.r") -