Mercurial > repos > xuebing > sharplab_interval_analysis
comparison align2database.py @ 20:16ba480adf96
Uploaded
| author | xuebing |
|---|---|
| date | Sat, 31 Mar 2012 08:31:22 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 19:d325683ec368 | 20:16ba480adf96 |
|---|---|
| 1 ''' | |
| 2 align mulitple bed to one bed | |
| 3 python align_multiple.py hmChIP_mm9_peak_bed/mm9-GSE19999_PolII_P25_all.cod.bed hmChIP_mm9_peak_bed/ test.txt test.pdf 100 5000 | |
| 4 ''' | |
| 5 | |
| 6 import os,sys,random | |
| 7 def main(): | |
| 8 queryfile = sys.argv[1] | |
| 9 inpath = sys.argv[2] | |
| 10 outputdata = sys.argv[3] | |
| 11 outputerr = sys.argv[4] | |
| 12 barplotpdf = sys.argv[5] | |
| 13 min_feat = sys.argv[6] # min features overlap | |
| 14 windowsize = sys.argv[7] | |
| 15 anchor = sys.argv[8] | |
| 16 span = sys.argv[9] # loess smooth parameter | |
| 17 | |
| 18 inpath = inpath.rstrip('/') | |
| 19 #os.system('rm '+inpath+'/*tmp*') | |
| 20 | |
| 21 infiles = os.listdir(inpath) | |
| 22 | |
| 23 #print len(infiles),' files\n' | |
| 24 i = 0 | |
| 25 for infile in infiles: | |
| 26 if 'tmp' in infile: | |
| 27 #os.system('rm '+inpath+'/'+infile) | |
| 28 continue | |
| 29 i = i +1 | |
| 30 print i,infile | |
| 31 output = infile.split('/')[-1]+'-to-'+queryfile.split('/')[-1]#'.summary' | |
| 32 if anchor == 'database': | |
| 33 command = 'python /Users/xuebing/galaxy-dist/tools/mytools/alignr.py -b '+inpath+'/'+infile+' -a '+queryfile+' -o '+output+' --summary-only -q -w '+windowsize | |
| 34 else: | |
| 35 command = 'python /Users/xuebing/galaxy-dist/tools/mytools/alignr.py -a '+inpath+'/'+infile+' -b '+queryfile+' -o '+output+' --summary-only -q -w '+windowsize | |
| 36 #print command+'\n' | |
| 37 os.system(command) | |
| 38 print 'start visualization...' | |
| 39 # visualize | |
| 40 rscriptfile = 'f-'+str(random.random())+'.r' | |
| 41 r = open(rscriptfile,'w') | |
| 42 r.write("files <- dir('.','summary',full.name=T)\n") | |
| 43 #r.write("print(files)\n") | |
| 44 r.write("x <- read.table(files[1])\n") | |
| 45 r.write("err <- read.table(gsub('summary','standarderror',files[1]))\n") | |
| 46 r.write("for (filename in files[2:length(files)]){\n") | |
| 47 r.write(" x <- rbind(x,read.table(filename))\n") | |
| 48 r.write(" err <- rbind(err,read.table(gsub('summary','standarderror',filename)))\n") | |
| 49 r.write("}\n") | |
| 50 r.write("x <- x[x[,2] > "+min_feat+",]\n") | |
| 51 r.write("err <- err[err[,2] > "+min_feat+",]\n") | |
| 52 r.write("name <- as.character(x[,1])\n") | |
| 53 r.write("nfeat <- x[,2]\n") | |
| 54 r.write("x <- x[,3:ncol(x)]\n") | |
| 55 r.write("err <- err[,3:ncol(err)]\n") | |
| 56 r.write("for (i in 1:nrow(x)) {\n") | |
| 57 r.write(" name[i] <- strsplit(name[i],'-to-')[[1]][1]\n") | |
| 58 r.write("}\n") | |
| 59 # remove rows that have no variation, which cause problem in heatmap. This is the case when align to itself | |
| 60 r.write("toremove <- seq(nrow(x))\n") | |
| 61 r.write("for (i in 1:nrow(x)){\n") | |
| 62 r.write(" toremove[i] <- all(x[i,] == x[i,1])\n") | |
| 63 r.write("}\n") | |
| 64 r.write("x <- x[!toremove,]\n") | |
| 65 r.write("err <- err[!toremove,]\n") | |
| 66 r.write("name <- name[!toremove]\n") | |
| 67 r.write("nfeat <- nfeat[!toremove]\n") | |
| 68 r.write("write.table(cbind(name,nfeat,x),file='"+outputdata+"',sep ='\\t',quote=F,row.names=F,col.names=F)\n") | |
| 69 r.write("write.table(cbind(name,nfeat,err),file='"+outputerr+"',sep ='\\t',quote=F,row.names=F,col.names=F)\n") | |
| 70 | |
| 71 r.write("pdf('"+barplotpdf+"')\n") | |
| 72 r.write("heatmap(t(scale(t(as.matrix(x,nc=ncol(x))))),Colv=NA,labRow=name,labCol=NA,margins=c(1,8),cexRow=0.02+1/log(nrow(x)),col=heat.colors(1000))\n") | |
| 73 | |
| 74 if windowsize != '0' : | |
| 75 r.write("xticks <- seq(-"+windowsize+","+windowsize+",length.out=100)\n") | |
| 76 r.write("xlab <- 'relative position (bp)'\n") | |
| 77 else: | |
| 78 r.write("xticks <- seq(100)\n") | |
| 79 r.write("xlab <- 'relative position (bin)'\n") | |
| 80 | |
| 81 #r.write("plot(xticks,colSums(t(scale(t(as.matrix(x,nc=ncol(x)))))),xlab='relative position (bp)',type='l',lwd=2,main='total signal')\n") | |
| 82 r.write("for (i in 1:nrow(x)) {\n") | |
| 83 r.write(" avg <- x[i,]/nfeat[i]\n") | |
| 84 #r.write(" maxv <- max(avg)\n") | |
| 85 #r.write(" minv <- min(avg)\n") | |
| 86 #r.write(" medv <- median(avg)\n") | |
| 87 #r.write(" if (maxv < "+fold+"*medv | minv*"+fold+">medv){next}\n") | |
| 88 #smooth | |
| 89 if float(span) >= 0.1: | |
| 90 r.write(" avg = loess(as.numeric(avg)~xticks,span="+span+")$fitted\n") | |
| 91 r.write(" err[i,] = loess(as.numeric(err[i,])~xticks,span="+span+")$fitted\n") | |
| 92 r.write(" par(cex=1.5)\n") | |
| 93 r.write(" plot(xticks,avg,ylab='average coverage',main=paste(name[i],'\n n=',nfeat[i],sep=''),xlab=xlab,type='l',lwd=1,ylim=c(min(avg-err[i,]),max(avg+err[i,])))\n") | |
| 94 r.write(" polygon(c(xticks,rev(xticks)),c(avg+err[i,],rev(avg-err[i,])),col='slateblue1',border=NA)\n") | |
| 95 r.write(" lines(xticks,avg,type='l',lwd=1)\n") | |
| 96 r.write("}\n") | |
| 97 r.write("dev.off()\n") | |
| 98 r.close() | |
| 99 os.system("R --vanilla < "+rscriptfile) | |
| 100 os.system('rm '+rscriptfile) | |
| 101 os.system('rm *.summary') | |
| 102 os.system('rm *.standarderror') | |
| 103 | |
| 104 main() |
