annotate tools/rgenetics/rgFastQC.py @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 wrapper for fastqc
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 called as
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 <command interpreter="python">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 rgFastqc.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 Current release seems overly intolerant of sam/bam header strangeness
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 Author notified...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 import os,sys,subprocess,optparse,shutil,tempfile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 from rgutils import getFileString
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 class FastQC():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 """wrapper
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 def __init__(self,opts=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 assert opts <> None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 self.opts = opts
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 def run_fastqc(self):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 In batch mode fastqc behaves not very nicely - will write to a new folder in
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 the same place as the infile called [infilebasename]_fastqc
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 rlazarus@omics:/data/galaxy/test$ ls FC041_1_sequence_fastqc
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 duplication_levels.png fastqc_icon.png per_base_n_content.png per_sequence_gc_content.png summary.txt
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 error.png fastqc_report.html per_base_quality.png per_sequence_quality.png tick.png
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 fastqc_data.txt per_base_gc_content.png per_base_sequence_content.png sequence_length_distribution.png warning.png
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 dummy,tlog = tempfile.mkstemp(prefix='rgFastQClog')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 sout = open(tlog, 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 fastq = os.path.basename(self.opts.input)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 cl = [self.opts.executable,'-o %s' % self.opts.outputdir]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 if self.opts.informat in ['sam','bam']:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 cl.append('-f %s' % self.opts.informat)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 if self.opts.contaminants <> None :
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 cl.append('-c %s' % self.opts.contaminants)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 cl.append(self.opts.input)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 p = subprocess.Popen(' '.join(cl), shell=True, stderr=sout, stdout=sout, cwd=self.opts.outputdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 return_value = p.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 sout.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 runlog = open(tlog,'r').readlines()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 os.unlink(tlog)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 flist = os.listdir(self.opts.outputdir) # fastqc plays games with its output directory name. eesh
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 odpath = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 for f in flist:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 d = os.path.join(self.opts.outputdir,f)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 if os.path.isdir(d):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 if d.endswith('_fastqc'):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 odpath = d
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 hpath = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 if odpath <> None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 hpath = os.path.join(odpath,'fastqc_report.html')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 rep = open(hpath,'r').readlines() # for our new html file but we need to insert our stuff after the <body> tag
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 if hpath == None:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 res = ['## odpath=%s: No output found in %s. Output for the run was:<pre>\n' % (odpath,hpath),]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 res += runlog
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 res += ['</pre>\n',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 'Please read the above for clues<br/>\n',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 'If you selected a sam/bam format file, it might not have headers or they may not start with @HD?<br/>\n',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 'It is also possible that the log shows that fastqc is not installed?<br/>\n',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 'If that is the case, please tell the relevant Galaxy administrator that it can be snarfed from<br/>\n',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 'http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/<br/>\n',]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 return res
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 self.fix_fastqcimages(odpath)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 flist = os.listdir(self.opts.outputdir) # these have now been fixed
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 excludefiles = ['tick.png','warning.png','fastqc_icon.png','error.png']
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 flist = [x for x in flist if not x in excludefiles]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 for i in range(len(rep)): # need to fix links to Icons and Image subdirectories in lastest fastqc code - ugh
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 rep[i] = rep[i].replace('Icons/','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 rep[i] = rep[i].replace('Images/','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 html = self.fix_fastqc(rep,flist,runlog)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 return html
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 def fix_fastqc(self,rep=[],flist=[],runlog=[]):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 """ add some of our stuff to the html
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 bs = '</body></html>\n' # hope they don't change this
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 bodyindex = rep.index(bs) # hope they don't change this
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 bodyindex = len(rep) - 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 res = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 res.append('<table>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 flist.sort()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 for i,f in enumerate(flist):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 if not(os.path.isdir(f)):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 fn = os.path.split(f)[-1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,getFileString(fn, self.opts.outputdir)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 res.append('</table><p/>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 res.append('<a href="http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/">FastQC documentation and full attribution is here</a><br/><hr/>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 res.append('FastQC was run by Galaxy using the rgenetics rgFastQC wrapper - see http://rgenetics.org for details and licensing\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 fixed = rep[:bodyindex] + res + rep[bodyindex:]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 return fixed # with our additions
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 def fix_fastqcimages(self,odpath):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 """ Galaxy wants everything in the same files_dir
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 icpath = os.path.join(odpath,'Icons')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 impath = os.path.join(odpath,'Images')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 for adir in [icpath,impath,odpath]:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 if os.path.exists(adir):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 flist = os.listdir(adir) # get all files created
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 for f in flist:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 if not os.path.isdir(os.path.join(adir,f)):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 sauce = os.path.join(adir,f)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 dest = os.path.join(self.opts.outputdir,f)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 shutil.move(sauce,dest)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 os.rmdir(adir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 if __name__ == '__main__':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 op = optparse.OptionParser()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 op.add_option('-i', '--input', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 op.add_option('-o', '--htmloutput', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 op.add_option('-d', '--outputdir', default="/tmp/shortread")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 op.add_option('-f', '--informat', default='fastq')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 op.add_option('-n', '--namejob', default='rgFastQC')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 op.add_option('-c', '--contaminants', default=None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 op.add_option('-e', '--executable', default='fastqc')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 opts, args = op.parse_args()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 assert opts.input <> None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 assert os.path.isfile(opts.executable),'##rgFastQC.py error - cannot find executable %s' % opts.executable
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 if not os.path.exists(opts.outputdir):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 os.makedirs(opts.outputdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 f = FastQC(opts)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 html = f.run_fastqc()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 f = open(opts.htmloutput, 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 f.write(''.join(html))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 f.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149