Mercurial > repos > xuebing > sharplab_interval_analysis
comparison metaintv_ext.py @ 20:16ba480adf96
Uploaded
| author | xuebing |
|---|---|
| date | Sat, 31 Mar 2012 08:31:22 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 19:d325683ec368 | 20:16ba480adf96 |
|---|---|
| 1 ''' | |
| 2 get binned score of intervals,allow extension | |
| 3 ''' | |
| 4 | |
| 5 import os,sys,numpy | |
| 6 import string, random | |
| 7 | |
| 8 from resize import * | |
| 9 | |
| 10 from bx.bbi.bigwig_file import BigWigFile | |
| 11 | |
| 12 def binning(x,n): | |
| 13 # make n bin of x | |
| 14 y = numpy.zeros(n,dtype=float) | |
| 15 if len(x) == 0: | |
| 16 return y | |
| 17 step = float(len(x))/n | |
| 18 for k in range(n): | |
| 19 i = int(step*k) | |
| 20 j = int(step*(k+1)) + 1 | |
| 21 y[k] = x[i:j].mean() | |
| 22 #print i,j,k,y[k] | |
| 23 return y | |
| 24 | |
| 25 def getBinnedScore(bwfile,intvfile,nbin): | |
| 26 ''' | |
| 27 get binned average and std | |
| 28 ''' | |
| 29 fbw = open(bwfile) | |
| 30 bw = BigWigFile(file=fbw) | |
| 31 fin = open(intvfile) | |
| 32 avg = numpy.zeros(nbin) | |
| 33 sqr = numpy.zeros(nbin) | |
| 34 N = 0 | |
| 35 for line in fin: | |
| 36 #print N | |
| 37 #chrom,start,end,name,score,strand | |
| 38 flds = line.strip().split('\t') | |
| 39 #get the score at base resolution as an array | |
| 40 scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2])) | |
| 41 if scores == None: | |
| 42 print 'not found:\t',N,line | |
| 43 continue | |
| 44 N = N + 1 | |
| 45 #print line,scores | |
| 46 # reverse if on minus strand | |
| 47 if flds[5] == '-': | |
| 48 scores = scores[::-1] | |
| 49 # no score = 0 | |
| 50 scores = numpy.nan_to_num(scores) | |
| 51 # bin the data | |
| 52 binned = binning(scores,nbin) | |
| 53 avg = avg + binned | |
| 54 sqr = sqr + binned**2 | |
| 55 # compute avg and std | |
| 56 avg = avg / N | |
| 57 err = ((sqr/N-avg**2)**0.5)/(N**0.5) | |
| 58 return avg,err,N | |
| 59 | |
| 60 def getExtendedBinScore(bwfile,intvfile,nbins,exts): | |
| 61 ''' | |
| 62 nbins: n1,n2,n3 | |
| 63 exts: l1,l2,l3,l4 | |
| 64 ''' | |
| 65 avg = [] | |
| 66 err = [] | |
| 67 tmpfile = "".join(random.sample(string.letters+string.digits, 8)) | |
| 68 if exts[0]>0 or exts[1]>0: | |
| 69 print 'make left extension' | |
| 70 resize(intvfile,tmpfile,'start-'+str(exts[0]),'start+'+str(exts[1]),'stranded') | |
| 71 print 'compute binned average' | |
| 72 avg,err,N = getBinnedScore(bwfile,tmpfile,nbins[0]) | |
| 73 print 'regions used:',N | |
| 74 print 'make center region' | |
| 75 resize(intvfile,tmpfile,'start+'+str(exts[1]),'end-'+str(exts[2]),'stranded') | |
| 76 print 'compute binned average' | |
| 77 avg1,err1,N = getBinnedScore(bwfile,tmpfile,nbins[1]) | |
| 78 print 'regions used:',N | |
| 79 avg = numpy.concatenate((avg,avg1)) | |
| 80 err = numpy.concatenate((err,err1)) | |
| 81 if exts[2]>0 or exts[3]>0: | |
| 82 print 'make right region' | |
| 83 resize(intvfile,tmpfile,'end-'+str(exts[2]),'end+'+str(exts[3]),'stranded') | |
| 84 print 'compute binned average' | |
| 85 avg2,err2,N = getBinnedScore(bwfile,tmpfile,nbins[2]) | |
| 86 print 'regions used:',N | |
| 87 avg = numpy.concatenate((avg,avg2)) | |
| 88 err = numpy.concatenate((err,err2)) | |
| 89 os.system('rm '+tmpfile) | |
| 90 return avg,err | |
| 91 | |
| 92 prog,bwfile,intvfile,exts,nbins,outfile,outplot = sys.argv | |
| 93 nbins = numpy.fromstring(nbins,dtype=int,sep=',') | |
| 94 exts = numpy.fromstring(exts,dtype=int,sep=',') | |
| 95 avg, err = getExtendedBinScore(bwfile,intvfile,nbins,exts) | |
| 96 print 'save data' | |
| 97 out = open(outfile,'w') | |
| 98 numpy.savetxt(out, avg, fmt='%.6f', delimiter=' ', newline=' ') | |
| 99 out.write('\n') | |
| 100 numpy.savetxt(out, err, fmt='%.6f', delimiter=' ', newline=' ') | |
| 101 out.write('\n') | |
| 102 out.close() | |
| 103 | |
| 104 print 'plot' | |
| 105 start = exts[0]*nbins[0]/(exts[0]+exts[1]) | |
| 106 end = nbins[0]+nbins[1]+exts[2]*nbins[2]/(exts[2]+exts[3]) | |
| 107 #print start,end | |
| 108 rscript = open("tmp.r","w") | |
| 109 rscript.write("options(warn=-1)\n") | |
| 110 rscript.write("x <- read.table('"+outfile+"')\n") | |
| 111 rscript.write("pdf('"+outplot+"')\n") | |
| 112 rscript.write("avg <- x[1,]\n") | |
| 113 rscript.write("err <- x[2,]\n") | |
| 114 #rscript.write("print(x)\n") | |
| 115 rscript.write("ylim=c(min(avg-err),max(avg+err))\n") | |
| 116 rscript.write("xticks <- seq(ncol(x))\n") | |
| 117 #rscript.write("print(xticks)\n") | |
| 118 rscript.write("plot(xticks,avg,xlab='',ylab='average coverage',type='l',lwd=0,ylim=ylim,xaxt='n')\n") | |
| 119 rscript.write("axis(1, at=c(min(xticks),"+str(start)+","+str(end)+",max(xticks)),labels=c(-"+str(exts[0])+",0,0,"+str(exts[3])+"), las=2)\n") | |
| 120 rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n") | |
| 121 rscript.write("lines(xticks,avg,type='l',lwd=1)\n") | |
| 122 rscript.write("lines(c(min(xticks),max(xticks)),c(0,0),lwd=2)\n") | |
| 123 rscript.write("lines(c("+str(start)+","+str(end)+"),c(0,0),lwd=10)\n") | |
| 124 rscript.write("dev.off()\n") | |
| 125 rscript.close() | |
| 126 os.system("R --vanilla --slave < tmp.r") | |
| 127 os.system("rm tmp.r") | |
| 128 |
