annotate tools/mytools/getGenomicScore.py @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 import random,string,os,sys
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 def getScore(intvfile,outfile,summary_type,bwfilepath,nbin,strand,outplot,span):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 f = open(intvfile)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 tmpsh = "".join(random.sample(string.letters+string.digits, 8))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 tmpout = "".join(random.sample(string.letters+string.digits, 8))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 tmp = open(tmpsh,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 if os.path.isdir(bwfilepath):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 for line in f:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 flds = line.strip().split('\t')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 cmd = 'bigWigSummary -type='+summary_type+' '+bwfilepath+'/'+flds[0]+'.bw '+flds[0]+' '+flds[1]+' '+flds[2]+' '+nbin+' >> '+tmpout+' 2>>'+tmpout+'\n'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 tmp.write(cmd)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 for line in f:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 flds = line.strip().split('\t')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 cmd = 'bigWigSummary -type='+summary_type+' '+bwfilepath+' '+flds[0]+' '+flds[1]+' '+flds[2]+' '+nbin+' >> '+tmpout+' 2>>'+tmpout+'\n'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 tmp.write(cmd)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 f.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 # remove blank lines
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 tmp.write("sed '/^$/d' "+tmpout+'>'+tmpout+".1\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 tmp.write("sed '/^Can/d' "+tmpout+".1 >"+tmpout+".2\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 # set n/a to 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 tmp.write("sed 's/n\/a/0/g' "+tmpout+".2 >"+tmpout+".3\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 # replace text with 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 zeros = ''.join(['0\t']*int(nbin))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 tmp.write("sed 's/^[a-zA-Z]/"+zeros+"/' "+tmpout+".3 >"+tmpout+".4\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 # cut the first nbin columns
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 tmp.write("cut -f 1-"+nbin+" "+tmpout+".4 > "+tmpout+".5\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 tmp.write("paste "+intvfile+" "+tmpout+".5 >"+outfile+"\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 tmp.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 os.system('chmod +x '+tmpsh)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 os.system('./'+tmpsh)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 #os.system('rm '+tmpout+'*')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 #os.system('rm '+tmpsh)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 # strandness: need to reverse bins for - strand
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 if nbin > 1 and strand > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 strand = strand - 1 # python is 0 based
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 os.system('mv '+outfile+' '+tmpout)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 f = open(tmpout)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 out = open(outfile,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 for line in f:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 flds=line.strip().split('\t')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 if flds[strand] == '+':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 out.write(line)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 scores = flds[-int(nbin):]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 scores.reverse()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 flds = flds[:-int(nbin)]+scores
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 out.write('\t'.join(flds)+'\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 os.system('rm '+tmpout)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 f.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 # plot
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 if int(nbin) > 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 rscript = open(tmpsh,"w")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 rscript.write("options(warn=-1)\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 rscript.write("x <- read.table('"+outfile+"',sep='\t')\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 rscript.write("x <- x[,(ncol(x)+1-"+nbin+"):ncol(x)]\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 rscript.write("pdf('"+outplot+"')\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 rscript.write("avg <- apply(x,2,mean)\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 rscript.write("err <- apply(x,2,sd)/sqrt(nrow(x))\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 rscript.write("ylim=c(min(avg-err),max(avg+err))\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 rscript.write("xticks <- seq(ncol(x))-(1+ncol(x))/2\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 if span >= 0.1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 rscript.write("avg = loess(avg~xticks,span="+str(span)+")$fitted\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 rscript.write("err = loess(err~xticks,span="+str(span)+")$fitted\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 rscript.write("par(cex=1.5)\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 rscript.write("plot(xticks,avg,ylab='average conservation score',xlab='relative position (bin)',type='l',lwd=0,ylim=ylim)\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='slateblue1',border=NA)\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 rscript.write("lines(xticks,avg,type='l',lwd=1)\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 rscript.write("dev.off()\n")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 rscript.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 os.system("R --vanilla < "+tmpsh)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 os.system("rm "+tmpsh)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 getScore(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],int(sys.argv[6]),sys.argv[7],float(sys.argv[8]))