Mercurial > repos > xuebing > sharplabtool
comparison tools/rgenetics/rgFastQC.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 """ | |
2 wrapper for fastqc | |
3 | |
4 called as | |
5 <command interpreter="python"> | |
6 rgFastqc.py -i $input_file -d $html_file.files_path -o $html_file -n "$out_prefix" | |
7 </command> | |
8 | |
9 | |
10 | |
11 Current release seems overly intolerant of sam/bam header strangeness | |
12 Author notified... | |
13 | |
14 | |
15 """ | |
16 | |
17 import os,sys,subprocess,optparse,shutil,tempfile | |
18 from rgutils import getFileString | |
19 | |
20 class FastQC(): | |
21 """wrapper | |
22 """ | |
23 | |
24 | |
25 def __init__(self,opts=None): | |
26 assert opts <> None | |
27 self.opts = opts | |
28 | |
29 | |
30 def run_fastqc(self): | |
31 """ | |
32 In batch mode fastqc behaves not very nicely - will write to a new folder in | |
33 the same place as the infile called [infilebasename]_fastqc | |
34 rlazarus@omics:/data/galaxy/test$ ls FC041_1_sequence_fastqc | |
35 duplication_levels.png fastqc_icon.png per_base_n_content.png per_sequence_gc_content.png summary.txt | |
36 error.png fastqc_report.html per_base_quality.png per_sequence_quality.png tick.png | |
37 fastqc_data.txt per_base_gc_content.png per_base_sequence_content.png sequence_length_distribution.png warning.png | |
38 | |
39 """ | |
40 dummy,tlog = tempfile.mkstemp(prefix='rgFastQClog') | |
41 sout = open(tlog, 'w') | |
42 fastq = os.path.basename(self.opts.input) | |
43 cl = [self.opts.executable,'-o %s' % self.opts.outputdir] | |
44 if self.opts.informat in ['sam','bam']: | |
45 cl.append('-f %s' % self.opts.informat) | |
46 if self.opts.contaminants <> None : | |
47 cl.append('-c %s' % self.opts.contaminants) | |
48 cl.append(self.opts.input) | |
49 p = subprocess.Popen(' '.join(cl), shell=True, stderr=sout, stdout=sout, cwd=self.opts.outputdir) | |
50 return_value = p.wait() | |
51 sout.close() | |
52 runlog = open(tlog,'r').readlines() | |
53 os.unlink(tlog) | |
54 flist = os.listdir(self.opts.outputdir) # fastqc plays games with its output directory name. eesh | |
55 odpath = None | |
56 for f in flist: | |
57 d = os.path.join(self.opts.outputdir,f) | |
58 if os.path.isdir(d): | |
59 if d.endswith('_fastqc'): | |
60 odpath = d | |
61 hpath = None | |
62 if odpath <> None: | |
63 try: | |
64 hpath = os.path.join(odpath,'fastqc_report.html') | |
65 rep = open(hpath,'r').readlines() # for our new html file but we need to insert our stuff after the <body> tag | |
66 except: | |
67 pass | |
68 if hpath == None: | |
69 res = ['## odpath=%s: No output found in %s. Output for the run was:<pre>\n' % (odpath,hpath),] | |
70 res += runlog | |
71 res += ['</pre>\n', | |
72 'Please read the above for clues<br/>\n', | |
73 'If you selected a sam/bam format file, it might not have headers or they may not start with @HD?<br/>\n', | |
74 'It is also possible that the log shows that fastqc is not installed?<br/>\n', | |
75 'If that is the case, please tell the relevant Galaxy administrator that it can be snarfed from<br/>\n', | |
76 'http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/<br/>\n',] | |
77 return res | |
78 self.fix_fastqcimages(odpath) | |
79 flist = os.listdir(self.opts.outputdir) # these have now been fixed | |
80 excludefiles = ['tick.png','warning.png','fastqc_icon.png','error.png'] | |
81 flist = [x for x in flist if not x in excludefiles] | |
82 for i in range(len(rep)): # need to fix links to Icons and Image subdirectories in lastest fastqc code - ugh | |
83 rep[i] = rep[i].replace('Icons/','') | |
84 rep[i] = rep[i].replace('Images/','') | |
85 | |
86 html = self.fix_fastqc(rep,flist,runlog) | |
87 return html | |
88 | |
89 | |
90 | |
91 def fix_fastqc(self,rep=[],flist=[],runlog=[]): | |
92 """ add some of our stuff to the html | |
93 """ | |
94 bs = '</body></html>\n' # hope they don't change this | |
95 try: | |
96 bodyindex = rep.index(bs) # hope they don't change this | |
97 except: | |
98 bodyindex = len(rep) - 1 | |
99 res = [] | |
100 res.append('<table>\n') | |
101 flist.sort() | |
102 for i,f in enumerate(flist): | |
103 if not(os.path.isdir(f)): | |
104 fn = os.path.split(f)[-1] | |
105 res.append('<tr><td><a href="%s">%s</a></td></tr>\n' % (fn,getFileString(fn, self.opts.outputdir))) | |
106 res.append('</table><p/>\n') | |
107 res.append('<a href="http://www.bioinformatics.bbsrc.ac.uk/projects/fastqc/">FastQC documentation and full attribution is here</a><br/><hr/>\n') | |
108 res.append('FastQC was run by Galaxy using the rgenetics rgFastQC wrapper - see http://rgenetics.org for details and licensing\n') | |
109 fixed = rep[:bodyindex] + res + rep[bodyindex:] | |
110 return fixed # with our additions | |
111 | |
112 | |
113 def fix_fastqcimages(self,odpath): | |
114 """ Galaxy wants everything in the same files_dir | |
115 """ | |
116 icpath = os.path.join(odpath,'Icons') | |
117 impath = os.path.join(odpath,'Images') | |
118 for adir in [icpath,impath,odpath]: | |
119 if os.path.exists(adir): | |
120 flist = os.listdir(adir) # get all files created | |
121 for f in flist: | |
122 if not os.path.isdir(os.path.join(adir,f)): | |
123 sauce = os.path.join(adir,f) | |
124 dest = os.path.join(self.opts.outputdir,f) | |
125 shutil.move(sauce,dest) | |
126 os.rmdir(adir) | |
127 | |
128 | |
129 | |
130 if __name__ == '__main__': | |
131 op = optparse.OptionParser() | |
132 op.add_option('-i', '--input', default=None) | |
133 op.add_option('-o', '--htmloutput', default=None) | |
134 op.add_option('-d', '--outputdir', default="/tmp/shortread") | |
135 op.add_option('-f', '--informat', default='fastq') | |
136 op.add_option('-n', '--namejob', default='rgFastQC') | |
137 op.add_option('-c', '--contaminants', default=None) | |
138 op.add_option('-e', '--executable', default='fastqc') | |
139 opts, args = op.parse_args() | |
140 assert opts.input <> None | |
141 assert os.path.isfile(opts.executable),'##rgFastQC.py error - cannot find executable %s' % opts.executable | |
142 if not os.path.exists(opts.outputdir): | |
143 os.makedirs(opts.outputdir) | |
144 f = FastQC(opts) | |
145 html = f.run_fastqc() | |
146 f = open(opts.htmloutput, 'w') | |
147 f.write(''.join(html)) | |
148 f.close() | |
149 |