Mercurial > repos > xuebing > sharplabtool
comparison mytools/getGenomicScore.py @ 9:87eb5c5ddfe9
Uploaded
| author | xuebing |
|---|---|
| date | Fri, 09 Mar 2012 20:01:43 -0500 |
| parents | f0dc65e7f6c0 |
| children |
comparison
equal
deleted
inserted
replaced
| 8:361ec1c0479d | 9:87eb5c5ddfe9 |
|---|---|
| 1 | |
| 2 import random,string,os,sys | |
| 3 | |
| 4 | |
| 5 def getScore(intvfile,outfile,summary_type,bwfilepath,nbin,strand,outplot,span): | |
| 6 f = open(intvfile) | |
| 7 tmpsh = "".join(random.sample(string.letters+string.digits, 8)) | |
| 8 tmpout = "".join(random.sample(string.letters+string.digits, 8)) | |
| 9 tmp = open(tmpsh,'w') | |
| 10 if os.path.isdir(bwfilepath): | |
| 11 for line in f: | |
| 12 flds = line.strip().split('\t') | |
| 13 cmd = 'bigWigSummary -type='+summary_type+' '+bwfilepath+'/'+flds[0]+'.bw '+flds[0]+' '+flds[1]+' '+flds[2]+' '+nbin+' >> '+tmpout+' 2>>'+tmpout+'\n' | |
| 14 tmp.write(cmd) | |
| 15 else: | |
| 16 for line in f: | |
| 17 flds = line.strip().split('\t') | |
| 18 cmd = 'bigWigSummary -type='+summary_type+' '+bwfilepath+' '+flds[0]+' '+flds[1]+' '+flds[2]+' '+nbin+' >> '+tmpout+' 2>>'+tmpout+'\n' | |
| 19 tmp.write(cmd) | |
| 20 f.close() | |
| 21 # remove blank lines | |
| 22 tmp.write("sed '/^$/d' "+tmpout+'>'+tmpout+".1\n") | |
| 23 tmp.write("sed '/^Can/d' "+tmpout+".1 >"+tmpout+".2\n") | |
| 24 # set n/a to 0 | |
| 25 tmp.write("sed 's/n\/a/0/g' "+tmpout+".2 >"+tmpout+".3\n") | |
| 26 # replace text with 0 | |
| 27 zeros = ''.join(['0\t']*int(nbin)) | |
| 28 tmp.write("sed 's/^[a-zA-Z]/"+zeros+"/' "+tmpout+".3 >"+tmpout+".4\n") | |
| 29 # cut the first nbin columns | |
| 30 tmp.write("cut -f 1-"+nbin+" "+tmpout+".4 > "+tmpout+".5\n") | |
| 31 tmp.write("paste "+intvfile+" "+tmpout+".5 >"+outfile+"\n") | |
| 32 tmp.close() | |
| 33 os.system('chmod +x '+tmpsh) | |
| 34 os.system('./'+tmpsh) | |
| 35 #os.system('rm '+tmpout+'*') | |
| 36 #os.system('rm '+tmpsh) | |
| 37 | |
| 38 # strandness: need to reverse bins for - strand | |
| 39 if nbin > 1 and strand > 0: | |
| 40 strand = strand - 1 # python is 0 based | |
| 41 os.system('mv '+outfile+' '+tmpout) | |
| 42 f = open(tmpout) | |
| 43 out = open(outfile,'w') | |
| 44 for line in f: | |
| 45 flds=line.strip().split('\t') | |
| 46 if flds[strand] == '+': | |
| 47 out.write(line) | |
| 48 else: | |
| 49 scores = flds[-int(nbin):] | |
| 50 scores.reverse() | |
| 51 flds = flds[:-int(nbin)]+scores | |
| 52 out.write('\t'.join(flds)+'\n') | |
| 53 os.system('rm '+tmpout) | |
| 54 f.close() | |
| 55 out.close() | |
| 56 # plot | |
| 57 if int(nbin) > 1: | |
| 58 rscript = open(tmpsh,"w") | |
| 59 rscript.write("options(warn=-1)\n") | |
| 60 rscript.write("x <- read.table('"+outfile+"',sep='\t')\n") | |
| 61 rscript.write("x <- x[,(ncol(x)+1-"+nbin+"):ncol(x)]\n") | |
| 62 rscript.write("pdf('"+outplot+"')\n") | |
| 63 rscript.write("avg <- apply(x,2,mean)\n") | |
| 64 rscript.write("err <- apply(x,2,sd)/sqrt(nrow(x))\n") | |
| 65 rscript.write("ylim=c(min(avg-err),max(avg+err))\n") | |
| 66 rscript.write("xticks <- seq(ncol(x))-(1+ncol(x))/2\n") | |
| 67 if span >= 0.1: | |
| 68 rscript.write("avg = loess(avg~xticks,span="+str(span)+")$fitted\n") | |
| 69 rscript.write("err = loess(err~xticks,span="+str(span)+")$fitted\n") | |
| 70 rscript.write("par(cex=1.5)\n") | |
| 71 rscript.write("plot(xticks,avg,ylab='average conservation score',xlab='relative position (bin)',type='l',lwd=0,ylim=ylim)\n") | |
| 72 rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='slateblue1',border=NA)\n") | |
| 73 rscript.write("lines(xticks,avg,type='l',lwd=1)\n") | |
| 74 rscript.write("dev.off()\n") | |
| 75 rscript.close() | |
| 76 os.system("R --vanilla < "+tmpsh) | |
| 77 os.system("rm "+tmpsh) | |
| 78 | |
| 79 getScore(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],int(sys.argv[6]),sys.argv[7],float(sys.argv[8])) |
