Mercurial > repos > xuebing > sharplab_interval_analysis
comparison interval/metaintv2.py @ 18:9bbb37e8683f
Uploaded
author | xuebing |
---|---|
date | Sat, 31 Mar 2012 08:24:32 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:688d26323b28 | 18:9bbb37e8683f |
---|---|
1 ''' | |
2 get binned score of intervals,allow extension | |
3 ''' | |
4 | |
5 import os,sys,numpy | |
6 | |
7 from resize import * | |
8 | |
9 from bx.bbi.bigwig_file import BigWigFile | |
10 | |
11 def binning(x,n): | |
12 # make n bin of x | |
13 y = numpy.zeros(n,dtype=float) | |
14 if len(x) == 0: | |
15 return y | |
16 step = float(len(x))/n | |
17 for k in range(n): | |
18 i = int(step*k) | |
19 j = int(step*(k+1)) + 1 | |
20 y[k] = x[i:j].mean() | |
21 #print i,j,k,y[k] | |
22 return y | |
23 | |
24 def getBinnedScore(bwfile,intvfile,nbin): | |
25 ''' | |
26 get binned average and std | |
27 ''' | |
28 fbw = open(bwfile) | |
29 bw = BigWigFile(file=fbw) | |
30 fin = open(intvfile) | |
31 avg = numpy.zeros(nbin) | |
32 sqr = numpy.zeros(nbin) | |
33 N = 0 | |
34 for line in fin: | |
35 #chrom,start,end,name,score,strand | |
36 flds = line.strip().split('\t') | |
37 #get the score at base resolution as an array | |
38 scores = bw.get_as_array(flds[0],int(flds[1]),int(flds[2])) | |
39 if scores == None: | |
40 print 'not found:\t',line | |
41 continue | |
42 N = N + 1 | |
43 #print line,scores | |
44 # reverse if on minus strand | |
45 if flds[5] == '-': | |
46 scores = scores[::-1] | |
47 # no score = 0 | |
48 scores = numpy.nan_to_num(scores) | |
49 # bin the data | |
50 binned = binning(scores,nbin) | |
51 avg = avg + binned | |
52 sqr = sqr + binned**2 | |
53 # compute avg and std | |
54 avg = avg / N | |
55 err = ((sqr/N-avg**2)**0.5)/(N**0.5) | |
56 return avg,err | |
57 | |
58 def getExtendedBinScore(bwfile,intvfile,nbins,exts): | |
59 ''' | |
60 nbins: n1,n2,n3 | |
61 exts: l1,l2,l3,l4 | |
62 ''' | |
63 # make left extension | |
64 resize(intvfile,intvfile+'.tmp','start-'+str(exts[0]),'start+'+str(exts[1]),'stranded') | |
65 # compute binned average | |
66 avg,err = getBinnedScore(bwfile,intvfile+'.tmp',nbins[0]) | |
67 # make center region | |
68 resize(intvfile,intvfile+'.tmp','start+'+str(exts[1]),'end-'+str(exts[2]),'stranded') | |
69 # compute binned average | |
70 avg1,err1 = getBinnedScore(bwfile,intvfile+'.tmp',nbins[1]) | |
71 avg = numpy.concatenate((avg,avg1)) | |
72 err = numpy.concatenate((err,err1)) | |
73 # make right region | |
74 resize(intvfile,intvfile+'.tmp','end-'+str(exts[2]),'end+'+str(exts[3]),'stranded') | |
75 # compute binned average | |
76 avg2,err2 = getBinnedScore(bwfile,intvfile+'.tmp',nbins[2]) | |
77 avg = numpy.concatenate((avg,avg2)) | |
78 err = numpy.concatenate((err,err2)) | |
79 | |
80 return avg,err | |
81 | |
82 print sys.argv | |
83 bwfile,intvfile,exts,nbins,outfile,outplot = sys.argv | |
84 avg, err = getExtendedBinScore(bwfile,intvfile,numpy.fromstring(nbins,sep=','),numpy.fromstring(exts,sep=',')) | |
85 out = open(outfile,'w') | |
86 numpy.savetxt(out, avg, fmt='%.6f', delimiter=' ', newline=' ') | |
87 out.write('\n') | |
88 numpy.savetxt(out, err, fmt='%.6f', delimiter=' ', newline=' ') | |
89 out.write('\n') | |
90 out.close() | |
91 | |
92 # plot | |
93 rscript = open("tmp.r","w") | |
94 rscript.write("options(warn=-1)\n") | |
95 rscript.write("x <- read.table('"+outfile+"')\n") | |
96 rscript.write("pdf('"+outplot+"')\n") | |
97 rscript.write("avg <- x[1,]\n") | |
98 rscript.write("err <- x[2,]\n") | |
99 rscript.write("print(x)\n") | |
100 rscript.write("ylim=c(min(avg-err),max(avg+err))\n") | |
101 rscript.write("xticks <- seq(ncol(x))\n") | |
102 rscript.write("plot(xticks,avg,ylab='average coverage',type='l',lwd=0,ylim=ylim)\n") | |
103 rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='lightgreen',border=NA)\n") | |
104 rscript.write("lines(xticks,avg,type='l',lwd=1)\n") | |
105 rscript.write("dev.off()\n") | |
106 rscript.close() | |
107 os.system("R --vanilla < tmp.r") | |
108 os.system("rm tmp.r") | |
109 |