annotate tools/picard/picard_wrapper.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 Originally written by Kelly Vincent
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 pretty output and additional picard wrappers by Ross Lazarus for rgenetics
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 Runs all available wrapped Picard tools.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 usage: picard_wrapper.py [options]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 code Ross wrote licensed under the LGPL
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 see http://www.gnu.org/copyleft/lesser.html
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 import optparse, os, sys, subprocess, tempfile, shutil, time, logging
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 galhtmlprefix = """<?xml version="1.0" encoding="utf-8" ?>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 <head>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 <meta name="generator" content="Galaxy %s tool output - see http://getgalaxy.org/" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 <title></title>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 <link rel="stylesheet" href="/static/style/base.css" type="text/css" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 </head>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 <body>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 <div class="document">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 galhtmlattr = """Galaxy tool %s run at %s</b><br/>"""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 galhtmlpostfix = """</div></body></html>\n"""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 def stop_err( msg ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 sys.stderr.write( '%s\n' % msg )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 def timenow():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 """return current time as a string
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 return time.strftime('%d/%m/%Y %H:%M:%S', time.localtime(time.time()))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 class PicardBase():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 simple base class with some utilities for Picard
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 adapted and merged with Kelly Vincent's code april 2011 Ross
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 lots of changes...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 def __init__(self, opts=None,arg0=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 """ common stuff needed at init for a picard tool
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 assert opts <> None, 'PicardBase needs opts at init'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 self.opts = opts
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 if self.opts.outdir == None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 self.opts.outdir = os.getcwd() # fixmate has no html file eg so use temp dir
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 assert self.opts.outdir <> None,'## PicardBase needs a temp directory if no output directory passed in'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 self.picname = self.baseName(opts.jar)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 if self.picname.startswith('picard'):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 self.picname = opts.picard_cmd # special case for some tools like replaceheader?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 self.progname = self.baseName(arg0)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 self.version = '0.002'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 self.delme = [] # list of files to destroy
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 self.title = opts.title
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 self.inputfile = opts.input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 os.makedirs(opts.outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 os.makedirs(opts.tmpdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 self.log_filename = os.path.join(self.opts.outdir,'%s.log' % self.picname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 self.metricsOut = os.path.join(opts.outdir,'%s.metrics.txt' % self.picname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 self.setLogging(logfname=self.log_filename)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 def baseName(self,name=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 return os.path.splitext(os.path.basename(name))[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 def setLogging(self,logfname="picard_wrapper.log"):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 """setup a logger
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 logging.basicConfig(level=logging.INFO,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 filename=logfname,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 filemode='a')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 def readLarge(self,fname=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 """ read a potentially huge file.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 tmp = open( fname, 'rb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 s = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 buffsize = 1048576
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 while True:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 more = tmp.read( buffsize )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 if len(more) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 s += more
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 except OverflowError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 tmp.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 stop_err( 'Error : %s' % str( e ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 return s
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 def runCL(self,cl=None,output_dir=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 """ construct and run a command line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 we have galaxy's temp path as opt.temp_dir so don't really need isolation
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 sometimes stdout is needed as the output - ugly hacks to deal with potentially vast artifacts
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 assert cl <> None, 'PicardBase runCL needs a command line as cl'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 if output_dir == None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 output_dir = self.opts.outdir
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 if type(cl) == type([]):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 cl = ' '.join(cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 fd,templog = tempfile.mkstemp(dir=output_dir,suffix='rgtempRun.txt')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 tlf = open(templog,'wb')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 fd,temperr = tempfile.mkstemp(dir=output_dir,suffix='rgtempErr.txt')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 tef = open(temperr,'wb')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 process = subprocess.Popen(cl, shell=True, stderr=tef, stdout=tlf, cwd=output_dir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 rval = process.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 tlf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 tef.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 stderrs = self.readLarge(temperr)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 stdouts = self.readLarge(templog)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 if len(stderrs) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 s = '## executing %s returned status %d and stderr: \n%s\n' % (cl,rval,stderrs)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 s = '## executing %s returned status %d and nothing on stderr\n' % (cl,rval)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 logging.info(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 os.unlink(templog) # always
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 os.unlink(temperr) # always
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 return s, stdouts # sometimes this is an output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 def runPic(self, jar, cl):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 cl should be everything after the jar file name in the command
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 runme = ['java -Xmx%s' % self.opts.maxjheap]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 runme.append('-jar %s' % jar)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 runme += cl
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 s,stdout = self.runCL(cl=runme, output_dir=self.opts.outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 return stdout
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 def samToBam(self,infile=None,outdir=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 use samtools view to convert sam to bam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 fd,tempbam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.bam')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 cl = ['samtools view -h -b -S -o ',tempbam,infile]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 tlog,stdouts = self.runCL(cl,outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 return tlog,tempbam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 #def bamToSam(self,infile=None,outdir=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 # """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 # use samtools view to convert bam to sam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 # """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 # fd,tempsam = tempfile.mkstemp(dir=outdir,suffix='rgutilsTemp.sam')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 # cl = ['samtools view -h -o ',tempsam,infile]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 # tlog,stdouts = self.runCL(cl,outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 # return tlog,tempsam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 def sortSam(self, infile=None,outfile=None,outdir=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 print '## sortSam got infile=%s,outfile=%s,outdir=%s' % (infile,outfile,outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 cl = ['samtools sort',infile,outfile]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 tlog,stdouts = self.runCL(cl,outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 return tlog
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 def cleanup(self):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 for fname in self.delme:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 os.unlink(fname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 def prettyPicout(self,transpose,maxrows):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 """organize picard outpouts into a report html page
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 res = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 r = open(self.metricsOut,'r').readlines()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 r = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 if len(r) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 res.append('<b>Picard on line resources</b><ul>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 res.append('<li><a href="http://picard.sourceforge.net/index.shtml">Click here for Picard Documentation</a></li>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 res.append('<li><a href="http://picard.sourceforge.net/picard-metric-definitions.shtml">Click here for Picard Metrics definitions</a></li></ul><hr/>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 if transpose:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193 res.append('<b>Picard output (transposed to make it easier to see)</b><hr/>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195 res.append('<b>Picard output</b><hr/>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 res.append('<table cellpadding="3" >\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 dat = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 heads = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 lastr = len(r) - 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200 # special case for estimate library complexity hist
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 thist = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 for i,row in enumerate(r):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 if row.strip() > '':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 srow = row.split('\t')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205 if row.startswith('#'):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 heads.append(row.strip()) # want strings
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208 dat.append(srow) # want lists
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 if row.startswith('## HISTOGRAM'):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210 thist = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 if len(heads) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212 hres = ['<tr class="d%d"><td colspan="2">%s</td></tr>' % (i % 2,x) for i,x in enumerate(heads)]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 res += hres
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214 heads = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215 if len(dat) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216 if transpose and not thist:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
217 tdat = map(None,*dat) # transpose an arbitrary list of lists
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
218 tdat = ['<tr class="d%d"><td>%s</td><td>%s&nbsp;</td></tr>\n' % ((i+len(heads)) % 2,x[0],x[1]) for i,x in enumerate(tdat)]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
219 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
220 tdat = ['\t'.join(x).strip() for x in dat] # back to strings :(
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
221 tdat = ['<tr class="d%d"><td colspan="2">%s</td></tr>\n' % ((i+len(heads)) % 2,x) for i,x in enumerate(tdat)]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
222 res += tdat
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
223 dat = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
224 res.append('</table>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
225 return res
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
226
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
227 def fixPicardOutputs(self,transpose,maxloglines):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
228 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
229 picard produces long hard to read tab header files
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
230 make them available but present them transposed for readability
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
231 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
232 logging.shutdown()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
233 self.cleanup() # remove temp files stored in delme
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
234 rstyle="""<style type="text/css">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
235 tr.d0 td {background-color: oldlace; color: black;}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
236 tr.d1 td {background-color: aliceblue; color: black;}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
237 </style>"""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
238 res = [rstyle,]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
239 res.append(galhtmlprefix % self.progname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
240 res.append(galhtmlattr % (self.picname,timenow()))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
241 flist = [x for x in os.listdir(self.opts.outdir) if not x.startswith('.')]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
242 pdflist = [x for x in flist if os.path.splitext(x)[-1].lower() == '.pdf']
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
243 if len(pdflist) > 0: # assumes all pdfs come with thumbnail .jpgs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
244 for p in pdflist:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
245 imghref = '%s.jpg' % os.path.splitext(p)[0] # removes .pdf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
246 res.append('<table cellpadding="10"><tr><td>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
247 res.append('<a href="%s"><img src="%s" title="Click image preview for a print quality PDF version" hspace="10" align="middle"></a>\n' % (p,imghref))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
248 res.append('</tr></td></table>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
249 if len(flist) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
250 res.append('<b>The following output files were created (click the filename to view/download a copy):</b><hr/>')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
251 res.append('<table>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
252 for i,f in enumerate(flist):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
253 fn = os.path.split(f)[-1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
254 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,fn))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
255 res.append('</table><p/>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
256 pres = self.prettyPicout(transpose,maxloglines)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
257 if len(pres) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
258 res += pres
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
259 l = open(self.log_filename,'r').readlines()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
260 llen = len(l)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
261 if llen > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
262 res.append('<b>Picard Tool Run Log</b><hr/>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
263 rlog = ['<pre>',]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
264 if llen > maxloglines:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
265 n = min(50,int(maxloglines/2))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
266 rlog += l[:n]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
267 rlog.append('------------ ## %d rows deleted ## --------------\n' % (llen-maxloglines))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
268 rlog += l[-n:]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
269 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
270 rlog += l
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
271 rlog.append('</pre>')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
272 if llen > maxloglines:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
273 rlog.append('\n<b>## WARNING - %d log lines truncated - <a href="%s">%s</a> contains entire output</b>' % (llen - maxloglines,self.log_filename,self.log_filename))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
274 res += rlog
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
275 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
276 res.append("### Odd, Picard left no log file %s - must have really barfed badly?\n" % self.log_filename)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
277 res.append('<hr/>The freely available <a href="http://picard.sourceforge.net/command-line-overview.shtml">Picard software</a> \n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
278 res.append( 'generated all outputs reported here running as a <a href="http://getgalaxy.org">Galaxy</a> tool')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
279 res.append(galhtmlpostfix)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
280 outf = open(self.opts.htmlout,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
281 outf.write(''.join(res))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
282 outf.write('\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
283 outf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
284
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
285 def makePicInterval(self,inbed=None,outf=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
286 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
287 picard wants bait and target files to have the same header length as the incoming bam/sam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
288 a meaningful (ie accurate) representation will fail because of this - so this hack
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
289 it would be far better to be able to supply the original bed untouched
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
290 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
291 assert inbed <> None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
292 bed = open(inbed,'r').readlines()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
293 thead = os.path.join(self.opts.outdir,'tempSamHead.txt')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
294 if self.opts.datatype == 'sam':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
295 cl = ['samtools view -H -S',self.opts.input,'>',thead]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
296 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
297 cl = ['samtools view -H',self.opts.input,'>',thead]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
298 self.runCL(cl=cl,output_dir=self.opts.outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
299 head = open(thead,'r').readlines()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
300 s = '## got %d rows of header\n' % (len(head))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
301 logging.info(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
302 o = open(outf,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
303 o.write(''.join(head))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
304 o.write(''.join(bed))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
305 o.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
306 return outf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
307
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
308 def cleanSam(self, insam=None, newsam=None, picardErrors=[],outformat=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
309 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
310 interesting problem - if paired, must remove mate pair of errors too or we have a new set of errors after cleaning - missing mate pairs!
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
311 Do the work of removing all the error sequences
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
312 pysam is cool
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
313 infile = pysam.Samfile( "-", "r" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
314 outfile = pysam.Samfile( "-", "w", template = infile )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
315 for s in infile: outfile.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
316
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
317 errors from ValidateSameFile.jar look like
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
318 WARNING: Record 32, Read name SRR006041.1202260, NM tag (nucleotide differences) is missing
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
319 ERROR: Record 33, Read name SRR006041.1042721, Empty sequence dictionary.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
320 ERROR: Record 33, Read name SRR006041.1042721, RG ID on SAMRecord not found in header: SRR006041
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
321
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
322 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
323 assert os.path.isfile(insam), 'rgPicardValidate cleansam needs an input sam file - cannot find %s' % insam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
324 assert newsam <> None, 'rgPicardValidate cleansam needs an output new sam file path'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
325 removeNames = [x.split(',')[1].replace(' Read name ','') for x in picardErrors if len(x.split(',')) > 2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
326 remDict = dict(zip(removeNames,range(len(removeNames))))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
327 infile = pysam.Samfile(insam,'rb')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
328 info = 'found %d error sequences in picardErrors, %d unique' % (len(removeNames),len(remDict))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
329 if len(removeNames) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
330 outfile = pysam.Samfile(newsam,'wb',template=infile) # template must be an open file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
331 i = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
332 j = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
333 for row in infile:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
334 dropme = remDict.get(row.qname,None) # keep if None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
335 if not dropme:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
336 outfile.write(row)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
337 j += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
338 else: # discard
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
339 i += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
340 info = '%s\n%s' % (info, 'Discarded %d lines writing %d to %s from %s' % (i,j,newsam,insam))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
341 outfile.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
342 infile.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
343 else: # we really want a nullop or a simple pointer copy
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
344 infile.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
345 if newsam:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
346 shutil.copy(insam,newsam)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
347 logging.info(info)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
348
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
349
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
350
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
351 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
352 doFix = False # tools returning htmlfile don't need this
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
353 doTranspose = True # default
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
354 maxloglines = 100 # default
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
355 #Parse Command Line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
356 op = optparse.OptionParser()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
357 # All tools
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
358 op.add_option('-i', '--input', dest='input', help='Input SAM or BAM file' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
359 op.add_option('-e', '--inputext', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
360 op.add_option('-o', '--output', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
361 op.add_option('-n', '--title', default="Pick a Picard Tool")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
362 op.add_option('-t', '--htmlout', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
363 op.add_option('-d', '--outdir', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
364 op.add_option('-x', '--maxjheap', default='4g')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
365 op.add_option('-b', '--bisulphite', default='false')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
366 op.add_option('-s', '--sortorder', default='query')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
367 op.add_option('','--tmpdir', default='/tmp')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
368 op.add_option('-j','--jar',default='')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
369 op.add_option('','--picard-cmd',default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
370 # Many tools
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
371 op.add_option( '', '--output-format', dest='output_format', help='Output format' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
372 op.add_option( '', '--bai-file', dest='bai_file', help='The path to the index file for the input bam file' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
373 op.add_option( '', '--ref', dest='ref', help='Built-in reference with fasta and dict file', default=None )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
374 # CreateSequenceDictionary
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
375 op.add_option( '', '--ref-file', dest='ref_file', help='Fasta to use as reference', default=None )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
376 op.add_option( '', '--species-name', dest='species_name', help='Species name to use in creating dict file from fasta file' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
377 op.add_option( '', '--build-name', dest='build_name', help='Name of genome assembly to use in creating dict file from fasta file' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
378 op.add_option( '', '--trunc-names', dest='trunc_names', help='Truncate sequence names at first whitespace from fasta file' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
379 # MarkDuplicates
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
380 op.add_option( '', '--remdups', default='true', help='Remove duplicates from output file' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
381 op.add_option( '', '--optdupdist', default="100", help='Maximum pixels between two identical sequences in order to consider them optical duplicates.' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
382 # CollectInsertSizeMetrics
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
383 op.add_option('', '--taillimit', default="0")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
384 op.add_option('', '--histwidth', default="0")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
385 op.add_option('', '--minpct', default="0.01")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
386 # CollectAlignmentSummaryMetrics
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
387 op.add_option('', '--maxinsert', default="20")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
388 op.add_option('', '--adaptors', action='append', type="string")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
389 # FixMateInformation and validate
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
390 # CollectGcBiasMetrics
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
391 op.add_option('', '--windowsize', default='100')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
392 op.add_option('', '--mingenomefrac', default='0.00001')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
393 # AddOrReplaceReadGroups
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
394 op.add_option( '', '--rg-opts', dest='rg_opts', help='Specify extra (optional) arguments with full, otherwise preSet' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
395 op.add_option( '', '--rg-lb', dest='rg_library', help='Read Group Library' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
396 op.add_option( '', '--rg-pl', dest='rg_platform', help='Read Group platform (e.g. illumina, solid)' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
397 op.add_option( '', '--rg-pu', dest='rg_plat_unit', help='Read Group platform unit (eg. run barcode) ' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
398 op.add_option( '', '--rg-sm', dest='rg_sample', help='Read Group sample name' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
399 op.add_option( '', '--rg-id', dest='rg_id', help='Read Group ID' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
400 op.add_option( '', '--rg-cn', dest='rg_seq_center', help='Read Group sequencing center name' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
401 op.add_option( '', '--rg-ds', dest='rg_desc', help='Read Group description' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
402 # ReorderSam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
403 op.add_option( '', '--allow-inc-dict-concord', dest='allow_inc_dict_concord', help='Allow incomplete dict concordance' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
404 op.add_option( '', '--allow-contig-len-discord', dest='allow_contig_len_discord', help='Allow contig length discordance' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
405 # ReplaceSamHeader
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
406 op.add_option( '', '--header-file', dest='header_file', help='sam or bam file from which header will be read' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
407
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
408 op.add_option('','--assumesorted', default='true')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
409 op.add_option('','--readregex', default="[a-zA-Z0-9]+:[0-9]:([0-9]+):([0-9]+):([0-9]+).*")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
410 #estimatelibrarycomplexity
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
411 op.add_option('','--minid', default="5")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
412 op.add_option('','--maxdiff', default="0.03")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
413 op.add_option('','--minmeanq', default="20")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
414 #hsmetrics
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
415 op.add_option('','--baitbed', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
416 op.add_option('','--targetbed', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
417 #validate
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
418 op.add_option('','--ignoreflags', action='append', type="string")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
419 op.add_option('','--maxerrors', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
420 op.add_option('','--datatype', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
421 op.add_option('','--bamout', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
422 op.add_option('','--samout', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
423
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
424 opts, args = op.parse_args()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
425 opts.sortme = opts.assumesorted == 'false'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
426 assert opts.input <> None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
427 # need to add
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
428 # instance that does all the work
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
429 pic = PicardBase(opts,sys.argv[0])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
430
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
431 tmp_dir = opts.outdir
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
432 haveTempout = False # we use this where sam output is an option
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
433
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
434 # set ref and dict files to use (create if necessary)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
435 ref_file_name = opts.ref
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
436 if opts.ref_file <> None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
437 csd = 'CreateSequenceDictionary'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
438 realjarpath = os.path.split(opts.jar)[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
439 jarpath = os.path.join(realjarpath,'%s.jar' % csd) # for refseq
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
440 tmp_ref_fd, tmp_ref_name = tempfile.mkstemp( dir=opts.tmpdir , prefix = pic.picname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
441 ref_file_name = '%s.fasta' % tmp_ref_name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
442 # build dict
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
443 dict_file_name = '%s.dict' % tmp_ref_name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
444 os.symlink( opts.ref_file, ref_file_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
445 cl = ['REFERENCE=%s' % ref_file_name]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
446 cl.append('OUTPUT=%s' % dict_file_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
447 cl.append('URI=%s' % os.path.basename( opts.ref_file ))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
448 cl.append('TRUNCATE_NAMES_AT_WHITESPACE=%s' % opts.trunc_names)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
449 if opts.species_name:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
450 cl.append('SPECIES=%s' % opts.species_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
451 if opts.build_name:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
452 cl.append('GENOME_ASSEMBLY=%s' % opts.build_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
453 pic.delme.append(dict_file_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
454 pic.delme.append(ref_file_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
455 pic.delme.append(tmp_ref_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
456 s = pic.runPic(jarpath, cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
457 # run relevant command(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
458
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
459 # define temporary output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
460 # if output is sam, it must have that extension, otherwise bam will be produced
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
461 # specify sam or bam file with extension
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
462 if opts.output_format == 'sam':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
463 suff = '.sam'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
464 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
465 suff = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
466 tmp_fd, tempout = tempfile.mkstemp( dir=opts.tmpdir, suffix=suff )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
467
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
468 cl = ['VALIDATION_STRINGENCY=LENIENT',]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
469
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
470 if pic.picname == 'AddOrReplaceReadGroups':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
471 # sort order to match Galaxy's default
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
472 cl.append('SORT_ORDER=coordinate')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
473 # input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
474 cl.append('INPUT=%s' % opts.input)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
475 # outputs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
476 cl.append('OUTPUT=%s' % tempout)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
477 # required read groups
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
478 cl.append('RGLB="%s"' % opts.rg_library)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
479 cl.append('RGPL="%s"' % opts.rg_platform)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
480 cl.append('RGPU="%s"' % opts.rg_plat_unit)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
481 cl.append('RGSM="%s"' % opts.rg_sample)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
482 if opts.rg_id:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
483 cl.append('RGID="%s"' % opts.rg_id)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
484 # optional read groups
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
485 if opts.rg_seq_center:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
486 cl.append('RGCN="%s"' % opts.rg_seq_center)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
487 if opts.rg_desc:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
488 cl.append('RGDS="%s"' % opts.rg_desc)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
489 pic.runPic(opts.jar, cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
490 haveTempout = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
491
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
492 elif pic.picname == 'BamIndexStats':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
493 tmp_fd, tmp_name = tempfile.mkstemp( dir=tmp_dir )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
494 tmp_bam_name = '%s.bam' % tmp_name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
495 tmp_bai_name = '%s.bai' % tmp_bam_name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
496 os.symlink( opts.input, tmp_bam_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
497 os.symlink( opts.bai_file, tmp_bai_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
498 cl.append('INPUT=%s' % ( tmp_bam_name ))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
499 pic.delme.append(tmp_bam_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
500 pic.delme.append(tmp_bai_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
501 pic.delme.append(tmp_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
502 s = pic.runPic( opts.jar, cl )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
503 f = open(pic.metricsOut,'a')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
504 f.write(s) # got this on stdout from runCl
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
505 f.write('\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
506 f.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
507 doTranspose = False # but not transposed
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
508
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
509 elif pic.picname == 'EstimateLibraryComplexity':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
510 cl.append('I=%s' % opts.input)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
511 cl.append('O=%s' % pic.metricsOut)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
512 if float(opts.minid) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
513 cl.append('MIN_IDENTICAL_BASES=%s' % opts.minid)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
514 if float(opts.maxdiff) > 0.0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
515 cl.append('MAX_DIFF_RATE=%s' % opts.maxdiff)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
516 if float(opts.minmeanq) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
517 cl.append('MIN_MEAN_QUALITY=%s' % opts.minmeanq)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
518 if opts.readregex > '':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
519 cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
520 if float(opts.optdupdist) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
521 cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
522 pic.runPic(opts.jar,cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
523
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
524 elif pic.picname == 'CollectAlignmentSummaryMetrics':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
525 # Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
526 # why? Dunno Seems to work without complaining if the .bai file is AWOL....
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
527 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
528 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
529 os.symlink(ref_file_name,fakefasta)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
530 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
531 s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
532 info = s
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
533 shutil.copy(ref_file_name,fakefasta)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
534 pic.delme.append(fakefasta)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
535 cl.append('ASSUME_SORTED=%s' % opts.assumesorted)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
536 adaptorseqs = ''.join([' ADAPTER_SEQUENCE=%s' % x for x in opts.adaptors])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
537 cl.append(adaptorseqs)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
538 cl.append('IS_BISULFITE_SEQUENCED=%s' % opts.bisulphite)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
539 cl.append('MAX_INSERT_SIZE=%s' % opts.maxinsert)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
540 cl.append('OUTPUT=%s' % pic.metricsOut)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
541 cl.append('R=%s' % fakefasta)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
542 cl.append('TMP_DIR=%s' % opts.tmpdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
543 if not opts.assumesorted.lower() == 'true': # we need to sort input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
544 fakeinput = '%s.sorted' % opts.input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
545 s = pic.sortSam(opts.input, fakeinput, opts.outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
546 pic.delme.append(fakeinput)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
547 cl.append('INPUT=%s' % fakeinput)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
548 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
549 cl.append('INPUT=%s' % os.path.abspath(opts.input))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
550 pic.runPic(opts.jar,cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
551
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
552
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
553 elif pic.picname == 'CollectGcBiasMetrics':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
554 assert os.path.isfile(ref_file_name),'PicardGC needs a reference sequence - cannot read %s' % ref_file_name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
555 # sigh. Why do we do this fakefasta thing? Because we need NO fai to be available or picard barfs unless it has the same length as the input data.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
556 # why? Dunno
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
557 fakefasta = os.path.join(opts.outdir,'%s_fake.fasta' % os.path.basename(ref_file_name))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
558 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
559 os.symlink(ref_file_name,fakefasta)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
560 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
561 s = '## unable to symlink %s to %s - different devices? May need to replace with shutil.copy'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
562 info = s
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
563 shutil.copy(ref_file_name,fakefasta)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
564 pic.delme.append(fakefasta)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
565 x = 'rgPicardGCBiasMetrics'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
566 pdfname = '%s.pdf' % x
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
567 jpgname = '%s.jpg' % x
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
568 tempout = os.path.join(opts.outdir,'rgPicardGCBiasMetrics.out')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
569 temppdf = os.path.join(opts.outdir,pdfname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
570 cl.append('R=%s' % fakefasta)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
571 cl.append('WINDOW_SIZE=%s' % opts.windowsize)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
572 cl.append('MINIMUM_GENOME_FRACTION=%s' % opts.mingenomefrac)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
573 cl.append('INPUT=%s' % opts.input)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
574 cl.append('OUTPUT=%s' % tempout)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
575 cl.append('TMP_DIR=%s' % opts.tmpdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
576 cl.append('CHART_OUTPUT=%s' % temppdf)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
577 cl.append('SUMMARY_OUTPUT=%s' % pic.metricsOut)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
578 pic.runPic(opts.jar,cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
579 if os.path.isfile(temppdf):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
580 cl2 = ['convert','-resize x400',temppdf,os.path.join(opts.outdir,jpgname)] # make the jpg for fixPicardOutputs to find
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
581 s,stdouts = pic.runCL(cl=cl2,output_dir=opts.outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
582 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
583 s='### runGC: Unable to find pdf %s - please check the log for the causal problem\n' % temppdf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
584 lf = open(pic.log_filename,'a')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
585 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
586 lf.write('\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
587 lf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
588
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
589 elif pic.picname == 'CollectInsertSizeMetrics':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
590 isPDF = 'InsertSizeHist.pdf'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
591 pdfpath = os.path.join(opts.outdir,isPDF)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
592 histpdf = 'InsertSizeHist.pdf'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
593 cl.append('I=%s' % opts.input)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
594 cl.append('O=%s' % pic.metricsOut)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
595 cl.append('HISTOGRAM_FILE=%s' % histpdf)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
596 if opts.taillimit <> '0':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
597 cl.append('TAIL_LIMIT=%s' % opts.taillimit)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
598 if opts.histwidth <> '0':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
599 cl.append('HISTOGRAM_WIDTH=%s' % opts.histwidth)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
600 if float( opts.minpct) > 0.0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
601 cl.append('MINIMUM_PCT=%s' % opts.minpct)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
602 pic.runPic(opts.jar,cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
603 if os.path.exists(pdfpath): # automake thumbnail - will be added to html
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
604 cl2 = ['mogrify', '-format jpg -resize x400 %s' % pdfpath]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
605 s,stdouts = pic.runCL(cl=cl2,output_dir=opts.outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
606 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
607 s = 'Unable to find expected pdf file %s<br/>\n' % pdfpath
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
608 s += 'This <b>always happens if single ended data was provided</b> to this tool,\n'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
609 s += 'so please double check that your input data really is paired-end NGS data.<br/>\n'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
610 s += 'If your input was paired data this may be a bug worth reporting to the galaxy-bugs list\n<br/>'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
611 stdouts = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
612 logging.info(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
613 if len(stdouts) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
614 logging.info(stdouts)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
615
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
616 elif pic.picname == 'MarkDuplicates':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
617 # assume sorted even if header says otherwise
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
618 cl.append('ASSUME_SORTED=%s' % (opts.assumesorted))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
619 # input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
620 cl.append('INPUT=%s' % opts.input)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
621 # outputs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
622 cl.append('OUTPUT=%s' % opts.output)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
623 cl.append('METRICS_FILE=%s' % pic.metricsOut )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
624 # remove or mark duplicates
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
625 cl.append('REMOVE_DUPLICATES=%s' % opts.remdups)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
626 # the regular expression to be used to parse reads in incoming SAM file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
627 cl.append('READ_NAME_REGEX="%s"' % opts.readregex)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
628 # maximum offset between two duplicate clusters
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
629 cl.append('OPTICAL_DUPLICATE_PIXEL_DISTANCE=%s' % opts.optdupdist)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
630 pic.runPic(opts.jar, cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
631
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
632 elif pic.picname == 'FixMateInformation':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
633 cl.append('I=%s' % opts.input)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
634 cl.append('O=%s' % tempout)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
635 cl.append('SORT_ORDER=%s' % opts.sortorder)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
636 pic.runPic(opts.jar,cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
637 haveTempout = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
638
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
639 elif pic.picname == 'ReorderSam':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
640 # input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
641 cl.append('INPUT=%s' % opts.input)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
642 # output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
643 cl.append('OUTPUT=%s' % tempout)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
644 # reference
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
645 cl.append('REFERENCE=%s' % ref_file_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
646 # incomplete dict concordance
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
647 if opts.allow_inc_dict_concord == 'true':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
648 cl.append('ALLOW_INCOMPLETE_DICT_CONCORDANCE=true')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
649 # contig length discordance
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
650 if opts.allow_contig_len_discord == 'true':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
651 cl.append('ALLOW_CONTIG_LENGTH_DISCORDANCE=true')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
652 pic.runPic(opts.jar, cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
653 haveTempout = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
654
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
655 elif pic.picname == 'ReplaceSamHeader':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
656 cl.append('INPUT=%s' % opts.input)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
657 cl.append('OUTPUT=%s' % tempout)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
658 cl.append('HEADER=%s' % opts.header_file)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
659 pic.runPic(opts.jar, cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
660 haveTempout = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
661
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
662 elif pic.picname == 'CalculateHsMetrics':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
663 maxloglines = 100
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
664 baitfname = os.path.join(opts.outdir,'rgPicardHsMetrics.bait')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
665 targetfname = os.path.join(opts.outdir,'rgPicardHsMetrics.target')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
666 baitf = pic.makePicInterval(opts.baitbed,baitfname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
667 if opts.targetbed == opts.baitbed: # same file sometimes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
668 targetf = baitf
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
669 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
670 targetf = pic.makePicInterval(opts.targetbed,targetfname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
671 cl.append('BAIT_INTERVALS=%s' % baitf)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
672 cl.append('TARGET_INTERVALS=%s' % targetf)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
673 cl.append('INPUT=%s' % os.path.abspath(opts.input))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
674 cl.append('OUTPUT=%s' % pic.metricsOut)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
675 cl.append('TMP_DIR=%s' % opts.tmpdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
676 pic.runPic(opts.jar,cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
677
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
678 elif pic.picname == 'ValidateSamFile':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
679 import pysam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
680 doTranspose = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
681 sortedfile = os.path.join(opts.outdir,'rgValidate.sorted')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
682 stf = open(pic.log_filename,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
683 tlog = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
684 if opts.datatype == 'sam': # need to work with a bam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
685 tlog,tempbam = pic.samToBam(opts.input,opts.outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
686 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
687 tlog = pic.sortSam(tempbam,sortedfile,opts.outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
688 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
689 print '## exception on sorting sam file %s' % opts.input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
690 else: # is already bam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
691 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
692 tlog = pic.sortSam(opts.input,sortedfile,opts.outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
693 except: # bug - [bam_sort_core] not being ignored - TODO fixme
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
694 print '## exception on sorting bam file %s' % opts.input
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
695 if tlog:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
696 print '##tlog=',tlog
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
697 stf.write(tlog)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
698 stf.write('\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
699 sortedfile = '%s.bam' % sortedfile # samtools does that
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
700 cl.append('O=%s' % pic.metricsOut)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
701 cl.append('TMP_DIR=%s' % opts.tmpdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
702 cl.append('I=%s' % sortedfile)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
703 opts.maxerrors = '99999999'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
704 cl.append('MAX_OUTPUT=%s' % opts.maxerrors)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
705 if opts.ignoreflags[0] <> 'None': # picard error values to ignore
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
706 igs = ['IGNORE=%s' % x for x in opts.ignoreflags if x <> 'None']
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
707 cl.append(' '.join(igs))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
708 if opts.bisulphite.lower() <> 'false':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
709 cl.append('IS_BISULFITE_SEQUENCED=true')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
710 if opts.ref <> None or opts.ref_file <> None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
711 cl.append('R=%s' % ref_file_name)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
712 pic.runPic(opts.jar,cl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
713 if opts.datatype == 'sam':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
714 pic.delme.append(tempbam)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
715 newsam = opts.output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
716 outformat = 'bam'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
717 pe = open(pic.metricsOut,'r').readlines()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
718 pic.cleanSam(insam=sortedfile, newsam=newsam, picardErrors=pe,outformat=outformat)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
719 pic.delme.append(sortedfile) # not wanted
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
720 stf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
721 pic.cleanup()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
722 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
723 print >> sys.stderr,'picard.py got an unknown tool name - %s' % pic.picname
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
724 sys.exit(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
725 if haveTempout:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
726 # Some Picard tools produced a potentially intermediate bam file.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
727 # Either just move to final location or create sam
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
728 shutil.move(tempout, os.path.abspath(opts.output))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
729
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
730 if opts.htmlout <> None or doFix: # return a pretty html page
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
731 pic.fixPicardOutputs(transpose=doTranspose,maxloglines=maxloglines)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
732
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
733 if __name__=="__main__": __main__()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
734