Mercurial > repos > xuebing > sharplab_interval_analysis
comparison binnedAverage.py @ 20:16ba480adf96
Uploaded
| author | xuebing | 
|---|---|
| date | Sat, 31 Mar 2012 08:31:22 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 19:d325683ec368 | 20:16ba480adf96 | 
|---|---|
| 1 ''' | |
| 2 get binned score of intervals,allow extension | |
| 3 ''' | |
| 4 | |
| 5 import os,sys,numpy,random,string | |
| 6 | |
| 7 from resize import * | |
| 8 | |
| 9 from bx.bbi.bigwig_file import BigWigFile | |
| 10 | |
| 11 def binning(x,n): | |
| 12 # make n bin of x | |
| 13 y = numpy.zeros(n,dtype=float) | |
| 14 if len(x) == 0: | |
| 15 return y | |
| 16 step = float(len(x))/n | |
| 17 for k in range(n): | |
| 18 i = int(step*k) | |
| 19 j = int(step*(k+1)) + 1 | |
| 20 y[k] = x[i:j].mean() | |
| 21 #print i,j,k,y[k] | |
| 22 return y | |
| 23 | |
| 24 def getBinnedScore(bwfile,intvfile,outfile,outplot,nbin): | |
| 25 ''' | |
| 26 get binned average and std | |
| 27 ''' | |
| 28 fbw = open(bwfile) | |
| 29 bw = BigWigFile(file=fbw) | |
| 30 fin = open(intvfile) | |
| 31 out = open(outfile,'w') | |
| 32 zeros = '\t'.join(['0']*nbin) | |
| 33 for line in fin: | |
| 34 #chrom,start,end,name,score,strand | |
| 35 line = line.strip() | |
| 36 flds = line.split('\t') | |
| 37 #get the score at base resolution as an array | |
| 38 scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2])) | |
| 39 if scores == None: | |
| 40 print 'not found:\t',line | |
| 41 out.write(line+'\t'+zeros+'\n') | |
| 42 continue | |
| 43 # reverse if on minus strand | |
| 44 if flds[5] == '-': | |
| 45 scores = scores[::-1] | |
| 46 # no score = 0 | |
| 47 scores = numpy.nan_to_num(scores) | |
| 48 # bin the data | |
| 49 binned = binning(scores,nbin) | |
| 50 out.write(line+'\t'+'\t'.join(map(str,binned))+'\n') | |
| 51 fin.close() | |
| 52 out.close() | |
| 53 # plot | |
| 54 if nbin > 1: | |
| 55 tmp = "".join(random.sample(string.letters+string.digits, 8)) | |
| 56 rscript = open(tmp,"w") | |
| 57 rscript.write("options(warn=-1)\n") | |
| 58 rscript.write("x <- read.table('"+outfile+"',sep='\t')\n") | |
| 59 rscript.write("x <- x[,(ncol(x)+1-"+str(nbin)+"):ncol(x)]\n") | |
| 60 rscript.write("pdf('"+outplot+"')\n") | |
| 61 rscript.write("avg <- apply(x,2,mean)\n") | |
| 62 rscript.write("err <- apply(x,2,sd)/sqrt(nrow(x))\n") | |
| 63 rscript.write("print(avg)\n") | |
| 64 rscript.write("print(err)\n") | |
| 65 rscript.write("ylim=c(min(avg-err),max(avg+err))\n") | |
| 66 rscript.write("xticks <- seq(ncol(x))\n") | |
| 67 rscript.write("plot(xticks,avg,xlab='',ylab='average',type='l',lwd=0,ylim=ylim)\n") | |
| 68 rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n") | |
| 69 rscript.write("lines(xticks,avg,type='l',lwd=1)\n") | |
| 70 rscript.write("dev.off()\n") | |
| 71 rscript.close() | |
| 72 os.system("R --vanilla < "+tmp) | |
| 73 os.system("rm "+tmp) | |
| 74 | |
| 75 print sys.argv | |
| 76 prog,bwfile,intvfile,nbin,outfile,outplot = sys.argv | |
| 77 getBinnedScore(bwfile,intvfile,outfile,outplot,int(nbin)) | 
