Mercurial > repos > xuebing > sharplabtool
diff getGenomicScore.py @ 14:76e1b1b21cce default tip
Deleted selected files
author | xuebing |
---|---|
date | Tue, 13 Mar 2012 19:05:10 -0400 |
parents | 292186c14b08 |
children |
line wrap: on
line diff
--- a/getGenomicScore.py Sat Mar 10 08:17:36 2012 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,79 +0,0 @@ - -import random,string,os,sys - - -def getScore(intvfile,outfile,summary_type,bwfilepath,nbin,strand,outplot,span): - f = open(intvfile) - tmpsh = "".join(random.sample(string.letters+string.digits, 8)) - tmpout = "".join(random.sample(string.letters+string.digits, 8)) - tmp = open(tmpsh,'w') - if os.path.isdir(bwfilepath): - for line in f: - flds = line.strip().split('\t') - cmd = 'bigWigSummary -type='+summary_type+' '+bwfilepath+'/'+flds[0]+'.bw '+flds[0]+' '+flds[1]+' '+flds[2]+' '+nbin+' >> '+tmpout+' 2>>'+tmpout+'\n' - tmp.write(cmd) - else: - for line in f: - flds = line.strip().split('\t') - cmd = 'bigWigSummary -type='+summary_type+' '+bwfilepath+' '+flds[0]+' '+flds[1]+' '+flds[2]+' '+nbin+' >> '+tmpout+' 2>>'+tmpout+'\n' - tmp.write(cmd) - f.close() - # remove blank lines - tmp.write("sed '/^$/d' "+tmpout+'>'+tmpout+".1\n") - tmp.write("sed '/^Can/d' "+tmpout+".1 >"+tmpout+".2\n") - # set n/a to 0 - tmp.write("sed 's/n\/a/0/g' "+tmpout+".2 >"+tmpout+".3\n") - # replace text with 0 - zeros = ''.join(['0\t']*int(nbin)) - tmp.write("sed 's/^[a-zA-Z]/"+zeros+"/' "+tmpout+".3 >"+tmpout+".4\n") - # cut the first nbin columns - tmp.write("cut -f 1-"+nbin+" "+tmpout+".4 > "+tmpout+".5\n") - tmp.write("paste "+intvfile+" "+tmpout+".5 >"+outfile+"\n") - tmp.close() - os.system('chmod +x '+tmpsh) - os.system('./'+tmpsh) - #os.system('rm '+tmpout+'*') - #os.system('rm '+tmpsh) - - # strandness: need to reverse bins for - strand - if nbin > 1 and strand > 0: - strand = strand - 1 # python is 0 based - os.system('mv '+outfile+' '+tmpout) - f = open(tmpout) - out = open(outfile,'w') - for line in f: - flds=line.strip().split('\t') - if flds[strand] == '+': - out.write(line) - else: - scores = flds[-int(nbin):] - scores.reverse() - flds = flds[:-int(nbin)]+scores - out.write('\t'.join(flds)+'\n') - os.system('rm '+tmpout) - f.close() - out.close() - # plot - if int(nbin) > 1: - rscript = open(tmpsh,"w") - rscript.write("options(warn=-1)\n") - rscript.write("x <- read.table('"+outfile+"',sep='\t')\n") - rscript.write("x <- x[,(ncol(x)+1-"+nbin+"):ncol(x)]\n") - rscript.write("pdf('"+outplot+"')\n") - rscript.write("avg <- apply(x,2,mean)\n") - rscript.write("err <- apply(x,2,sd)/sqrt(nrow(x))\n") - rscript.write("ylim=c(min(avg-err),max(avg+err))\n") - rscript.write("xticks <- seq(ncol(x))-(1+ncol(x))/2\n") - if span >= 0.1: - rscript.write("avg = loess(avg~xticks,span="+str(span)+")$fitted\n") - rscript.write("err = loess(err~xticks,span="+str(span)+")$fitted\n") - rscript.write("par(cex=1.5)\n") - rscript.write("plot(xticks,avg,ylab='average conservation score',xlab='relative position (bin)',type='l',lwd=0,ylim=ylim)\n") - rscript.write("polygon(c(xticks,rev(xticks)),c(avg+err,rev(avg-err)),col='slateblue1',border=NA)\n") - rscript.write("lines(xticks,avg,type='l',lwd=1)\n") - rscript.write("dev.off()\n") - rscript.close() - os.system("R --vanilla < "+tmpsh) - os.system("rm "+tmpsh) - -getScore(sys.argv[1],sys.argv[2],sys.argv[3],sys.argv[4],sys.argv[5],int(sys.argv[6]),sys.argv[7],float(sys.argv[8]))