annotate tools/rgenetics/rgManQQ.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/local/bin/python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 # This is a truly ghastly hack
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 # all of the heavy data cleaning lifting is done in R which is a really dumb place IMHO
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 # Making a new file seems a waste but it would be far easier to set everything up in python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 # seems to work so I'm leaving it alone
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 # sigh. Should really move this gig to rpy - writing a robust R script is hard.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 # updated to compress pdf using gs since millions of points = horsechoker pdfs and pdfs are good
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 # updated july 20 to fix sort order - R unique() sorts into strict collating order
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 # so need to sort after unique to revert to lexicographic order for x axis on Manhattan
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 # rgmanqq updated july 19 to deal with x,y and mt
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 # lots of fixes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 # ross lazarus
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 import sys,math,shutil,subprocess,os,time,tempfile,string
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 from os.path import abspath
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 from rgutils import timenow, RRun, galhtmlprefix, galhtmlpostfix, galhtmlattr
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 progname = os.path.split(sys.argv[0])[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 myversion = 'V000.1 March 2010'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 verbose = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 debug = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 rcode="""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 # generalised so 3 core fields passed as parameters ross lazarus March 24 2010 for rgenetics
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 # Originally created as qqman with the following
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 # attribution:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 #--------------
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 # Stephen Turner
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 # http://StephenTurner.us/
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 # http://GettingGeneticsDone.blogspot.com/
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 # Last updated: 19 July 2011 by Ross Lazarus
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 # R code for making manhattan plots and QQ plots from plink output files.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 # With GWAS data this can take a lot of memory. Recommended for use on
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 # 64bit machines only, for now.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 #
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 library(ggplot2)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 coloursTouse = c('firebrick','darkblue','goldenrod','darkgreen')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 # not too ugly but need a colour expert please...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 DrawManhattan = function(pvals=Null,chrom=Null,offset=Null,title=NULL, max.y="max",suggestiveline=0, genomewide=T, size.x.labels=9,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 size.y.labels=10, annotate=F, SNPlist=NULL,grey=0) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 if (annotate & is.null(SNPlist)) stop("You requested annotation but provided no SNPlist!")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 genomewideline=NULL # was genomewideline=-log10(5e-8)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 n = length(pvals)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 if (genomewide) { # use bonferroni since might be only a small region?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 genomewideline = -log10(0.05/n) }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 offset = as.integer(offset)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 if (n > 1000000) { offset = offset/10000 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 else if (n > 10000) { offset = offset/1000}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 chro = as.integer(chrom) # already dealt with X and friends?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 pvals = as.double(pvals)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 d=data.frame(CHR=chro,BP=offset,P=pvals)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 if ("CHR" %in% names(d) & "BP" %in% names(d) & "P" %in% names(d) ) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 d=d[!is.na(d$P), ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 d=d[!is.na(d$BP), ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 d=d[!is.na(d$CHR), ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 #limit to only chrs 1-22, x=23,y=24,Mt=25?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 d=d[d$CHR %in% 1:25, ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 d=d[d$P>0 & d$P<=1, ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 d$logp = as.double(-log10(d$P))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 dlen = length(d$P)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 d$pos=NA
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 ticks=NULL
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 lastbase=0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 chrlist = unique(d$CHR)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 chrlist = as.integer(chrlist)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 chrlist = sort(chrlist) # returns lexical ordering
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 if (max.y=="max") { maxy = ceiling(max(d$logp)) }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 else { maxy = max.y }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 nchr = length(chrlist) # may be any number?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 maxy = max(maxy,1.1*genomewideline)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 if (nchr >= 2) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 for (x in c(1:nchr)) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 i = chrlist[x] # need the chrom number - may not == index
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 if (x == 1) { # first time
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP # initialize to first BP of chr1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 dsub = subset(d,CHR==i)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 dlen = length(dsub$P)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 lastbase = max(dsub$pos) # last one
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 tks = d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 lastchr = i
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 } else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 d[d$CHR==i, ]$pos = d[d$CHR==i, ]$BP+lastbase # one humongous contig
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 if (sum(is.na(lastchr),is.na(lastbase),is.na(d[d$CHR==i, ]$pos))) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 cat(paste('manhattan: For',title,'chrlistx=',i,'lastchr=',lastchr,'lastbase=',lastbase,'pos=',d[d$CHR==i,]$pos))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 tks=c(tks, d[d$CHR==i, ]$pos[floor(length(d[d$CHR==i, ]$pos)/2)+1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 lastchr = i
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 dsub = subset(d,CHR==i)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 lastbase = max(dsub$pos) # last one
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 ticklim=c(min(d$pos),max(d$pos))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 xlabs = chrlist
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 } else { # nchr is 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 nticks = 10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 last = max(d$BP)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 first = min(d$BP)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 tks = c(first)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 t = (last-first)/nticks # units per tick
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 for (x in c(1:(nticks))) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 tks = c(tks,round(x*t)+first) }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 ticklim = c(first,last)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 } # else
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 if (grey) {mycols=rep(c("gray10","gray60"),max(d$CHR))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 } else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 mycols=rep(coloursTouse,max(d$CHR))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 dlen = length(d$P)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 d$pranks = rank(d$P)/dlen
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 d$centiles = 100*d$pranks # small are interesting
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 d$sizes = ifelse((d$centile < 1),2,1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 if (annotate) d.annotate=d[as.numeric(substr(d$SNP,3,100)) %in% SNPlist, ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 if (nchr >= 2) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 manplot=qplot(pos,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR),size=factor(sizes))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 manplot=manplot+scale_x_continuous(name="Chromosome", breaks=tks, labels=xlabs,limits=ticklim)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 manplot=manplot+scale_size_manual(values = c(0.5,1.5)) # requires discreet scale - eg factor
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 #manplot=manplot+scale_size(values=c(0.5,2)) # requires continuous
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 manplot=qplot(BP,logp,data=d, ylab=expression(-log[10](italic(p))) , colour=factor(CHR))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 manplot=manplot+scale_x_continuous(name=paste("Chromosome",chrlist[1]), breaks=tks, labels=tks,limits=ticklim)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 manplot=manplot+scale_y_continuous(limits=c(0,maxy), breaks=1:maxy, labels=1:maxy)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 manplot=manplot+scale_colour_manual(value=mycols)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 if (annotate) { manplot=manplot + geom_point(data=d.annotate, colour=I("green3")) }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 manplot=manplot + opts(legend.position = "none")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 manplot=manplot + opts(title=title)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 manplot=manplot+opts(
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 panel.background=theme_blank(),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 axis.text.x=theme_text(size=size.x.labels, colour="grey50"),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 axis.text.y=theme_text(size=size.y.labels, colour="grey50"),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 axis.ticks=theme_segment(colour=NA)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 if (suggestiveline) manplot=manplot+geom_hline(yintercept=suggestiveline,colour="blue", alpha=I(1/3))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 if (genomewideline) manplot=manplot+geom_hline(yintercept=genomewideline,colour="red")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 manplot
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 } else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 stop("Make sure your data frame contains columns CHR, BP, and P")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 qq = function(pvector, title=NULL, spartan=F) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 # Thanks to Daniel Shriner at NHGRI for providing this code for creating expected and observed values
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 o = -log10(sort(pvector,decreasing=F))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 e = -log10( 1:length(o)/length(o) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 # you could use base graphics
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 # plot(e,o,pch=19,cex=0.25, xlab=expression(Expected~~-log[10](italic(p))),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 # ylab=expression(Observed~~-log[10](italic(p))), xlim=c(0,max(e)), ylim=c(0,max(e)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 # lines(e,e,col="red")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 #You'll need ggplot2 installed to do the rest
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 qq=qplot(e,o, xlim=c(0,max(e)), ylim=c(0,max(o))) + stat_abline(intercept=0,slope=1, col="red")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 qq=qq+opts(title=title)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 qq=qq+scale_x_continuous(name=expression(Expected~~-log[10](italic(p))))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 qq=qq+scale_y_continuous(name=expression(Observed~~-log[10](italic(p))))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 if (spartan) plot=plot+opts(panel.background=theme_rect(col="grey50"), panel.grid.minor=theme_blank())
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 qq
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 # we need another string to avoid confusion over string substitutions with %in%
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 # instantiate rcode2 string with infile,chromcol,offsetcol,pvalscols,title before saving and running
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 rcode2 = """rgqqMan = function(infile="%s",chromcolumn=%d, offsetcolumn=%d, pvalscolumns=c(%s),
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 title="%s",grey=%d) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 rawd = read.table(infile,head=T,sep='\\t')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 dn = names(rawd)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 cc = dn[chromcolumn]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 oc = dn[offsetcolumn]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 rawd[,cc] = sub('chr','',rawd[,cc],ignore.case = T) # just in case
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 rawd[,cc] = sub(':','',rawd[,cc],ignore.case = T) # ugh
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 rawd[,cc] = sub('X',23,rawd[,cc],ignore.case = T)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 rawd[,cc] = sub('Y',24,rawd[,cc],ignore.case = T)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 rawd[,cc] = sub('Mt',25,rawd[,cc], ignore.case = T)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 nams = c(cc,oc) # for sorting
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 plen = length(rawd[,1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 print(paste('###',plen,'values read from',infile,'read - now running plots',sep=' '))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 rawd = rawd[do.call(order,rawd[nams]),]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 # mmmf - suggested by http://onertipaday.blogspot.com/2007/08/sortingordering-dataframe-according.html
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 # in case not yet ordered
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 if (plen > 0) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 for (pvalscolumn in pvalscolumns) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 if (pvalscolumn > 0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 cname = names(rawd)[pvalscolumn]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 mytitle = paste('p=',cname,', ',title,sep='')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193 myfname = chartr(' ','_',cname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 myqqplot = qq(rawd[,pvalscolumn],title=mytitle)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195 ggsave(filename=paste(myfname,"qqplot.png",sep='_'),myqqplot,width=8,height=6,dpi=96)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 ggsave(filename=paste(myfname,"qqplot.pdf",sep='_'),myqqplot,width=8,height=6,dpi=96)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 print(paste('## qqplot on',cname,'done'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 if ((chromcolumn > 0) & (offsetcolumn > 0)) {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 print(paste('## manhattan on',cname,'starting',chromcolumn,offsetcolumn,pvalscolumn))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200 mymanplot= DrawManhattan(chrom=rawd[,chromcolumn],offset=rawd[,offsetcolumn],pvals=rawd[,pvalscolumn],title=mytitle,grey=grey)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 ggsave(filename=paste(myfname,"manhattan.png",sep='_'),mymanplot,width=8,height=6,dpi=96)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 ggsave(filename=paste(myfname,"manhattan.pdf",sep='_'),mymanplot,width=8,height=6,dpi=96)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 print(paste('## manhattan plot on',cname,'done'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205 else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 print(paste('chrom column =',chromcolumn,'offset column = ',offsetcolumn,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207 'so no Manhattan plot - supply both chromosome and offset as numerics for Manhattan plots if required'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210 else {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 print(paste('pvalue column =',pvalscolumn,'Cannot parse it so no plots possible'))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 } # for pvalscolumn
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214 } else { print('## Problem - no values available to plot - was there really a chromosome and offset column?') }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
217 rgqqMan()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
218 # execute with defaults as substituted
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
219 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
220
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
221
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
222 def doManQQ(input_fname,chrom_col,offset_col,pval_cols,title,grey,ctitle,outdir,beTidy=False):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
223 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
224 we may have an interval file or a tabular file - if interval, will have chr1... so need to adjust
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
225 to chrom numbers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
226 draw a qq for pvals and a manhattan plot if chrom/offset <> 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
227 contains some R scripts as text strings - we substitute defaults into the calls
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
228 to make them do our bidding - and save the resulting code for posterity
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
229 this can be called externally, I guess...for QC eg?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
230 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
231 if debug:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
232 print 'doManQQ',input_fname,chrom_col,offset_col,pval_cols,title,grey,ctitle,outdir
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
233 rcmd = '%s%s' % (rcode,rcode2 % (input_fname,chrom_col,offset_col,pval_cols,title,grey))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
234 if debug:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
235 print 'running\n%s\n' % rcmd
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
236 rlog,flist = RRun(rcmd=rcmd,title=ctitle,outdir=outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
237 rlog.append('## R script=')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
238 rlog.append(rcmd)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
239 return rlog,flist
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
240
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
241 def compressPDF(inpdf=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
242 """need absolute path to pdf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
243 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
244 assert os.path.isfile(inpdf), "## Input %s supplied to compressPDF not found" % inpdf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
245 outpdf = '%s_compressed' % inpdf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
246 cl = ["gs", "-sDEVICE=pdfwrite", "-dNOPAUSE", "-dBATCH", "-sOutputFile=%s" % outpdf,inpdf]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
247 retval = subprocess.call(cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
248 if retval == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
249 os.unlink(inpdf)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
250 shutil.move(outpdf,inpdf)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
251 return retval
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
252
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
253 def main():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
254 u = """<command interpreter="python">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
255 rgManQQ.py '$input_file' "$name" '$out_html' '$out_html.files_path' '$chrom_col' '$offset_col' '$pval_col'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
256 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
257 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
258 npar = 8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
259 if len(sys.argv) < npar:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
260 print >> sys.stdout, '## error - too few command line parameters - wanting %d' % npar
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
261 print >> sys.stdout, u
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
262 sys.exit(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
263 input_fname = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
264 title = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
265 killme = string.punctuation + string.whitespace
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
266 trantab = string.maketrans(killme,'_'*len(killme))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
267 ctitle = title.translate(trantab)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
268 outhtml = sys.argv[3]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
269 outdir = sys.argv[4]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
270 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
271 chrom_col = int(sys.argv[5])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
272 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
273 chrom_col = -1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
274 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
275 offset_col = int(sys.argv[6])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
276 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
277 offset_col = -1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
278 p = sys.argv[7].strip().split(',')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
279 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
280 q = [int(x) for x in p]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
281 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
282 p = -1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
283 if chrom_col == -1 or offset_col == -1: # was passed as zero - do not do manhattan plots
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
284 chrom_col = -1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
285 offset_col = -1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
286 grey = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
287 if (sys.argv[8].lower() in ['1','true']):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
288 grey = 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
289 if p == -1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
290 print >> sys.stderr,'## Cannot run rgManQQ - missing pval column'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
291 sys.exit(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
292 p = ['%d' % (int(x) + 1) for x in p]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
293 rlog,flist = doManQQ(input_fname,chrom_col+1,offset_col+1,','.join(p),title,grey,ctitle,outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
294 flist.sort()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
295 html = [galhtmlprefix % progname,]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
296 html.append('<h1>%s</h1>' % title)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
297 if len(flist) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
298 html.append('<table>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
299 for row in flist:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
300 fname,expl = row # RRun returns pairs of filenames fiddled for the log and R script
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
301 n,e = os.path.splitext(fname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
302 if e in ['.png','.jpg']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
303 pdf = '%s.pdf' % n
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
304 pdff = os.path.join(outdir,pdf)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
305 if os.path.exists(pdff):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
306 rval = compressPDF(inpdf=pdff)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
307 if rval <> 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
308 pdf = '%s(not_compressed)' % pdf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
309 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
310 pdf = '%s(not_found)' % pdf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
311 s= '<tr><td><a href="%s"><img src="%s" title="%s" hspace="10" width="800"></a></td></tr>' \
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
312 % (pdf,fname,expl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
313 html.append(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
314 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
315 html.append('<tr><td><a href="%s">%s</a></td></tr>' % (fname,expl))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
316 html.append('</table>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
317 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
318 html.append('<h2>### Error - R returned no files - please confirm that parameters are sane</h1>')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
319 html.append('<h3>R log follows below</h3><hr><pre>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
320 html += rlog
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
321 html.append('</pre>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
322 html.append(galhtmlattr % (progname,timenow()))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
323 html.append(galhtmlpostfix)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
324 htmlf = file(outhtml,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
325 htmlf.write('\n'.join(html))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
326 htmlf.write('\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
327 htmlf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
328
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
329
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
330
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
331 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
332 main()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
333
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
334