comparison rgFastQC.py @ 10:a00a6402d09a draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/fastqc commit 2bfbb5ae6b801e43355fdc3f964a5111fe3fe3a1
author iuc
date Wed, 08 Feb 2017 12:43:43 -0500
parents 3a458e268066
children db2dc6bc8f05
comparison
equal deleted inserted replaced
9:3a458e268066 10:a00a6402d09a
13 13
14 EXAMPLE (generated by Galaxy) 14 EXAMPLE (generated by Galaxy)
15 15
16 rgFastQC.py -i path/dataset_1.dat -j 1000gsample.fastq -o path/dataset_3.dat -d path/job_working_directory/subfolder 16 rgFastQC.py -i path/dataset_1.dat -j 1000gsample.fastq -o path/dataset_3.dat -d path/job_working_directory/subfolder
17 -f fastq -n FastQC -c path/dataset_2.dat -e fastqc 17 -f fastq -n FastQC -c path/dataset_2.dat -e fastqc
18
19 """ 18 """
20 19 import bz2
20 import glob
21 import gzip
22 import mimetypes
23 import optparse
24 import os
21 import re 25 import re
22 import os
23 import shutil 26 import shutil
24 import subprocess 27 import subprocess
25 import optparse
26 import tempfile 28 import tempfile
27 import glob
28 import gzip
29 import bz2
30 import zipfile 29 import zipfile
31 import mimetypes 30
32 31
33 class FastQCRunner(object): 32 class FastQCRunner(object):
34 33 def __init__(self, opts=None):
35 def __init__(self,opts=None):
36 ''' 34 '''
37 Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc() 35 Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc()
38 ''' 36 '''
39 37
40 # Check whether the options are specified and saves them into the object 38 # Check whether the options are specified and saves them into the object
41 assert opts != None 39 assert opts is not None
42 self.opts = opts 40 self.opts = opts
43 41
44 def prepare_command_line(self): 42 def prepare_command_line(self):
45 ''' 43 '''
46 Develops the Commandline to run FastQC in Galaxy 44 Develops the Commandline to run FastQC in Galaxy
60 f.readline() 58 f.readline()
61 except: 59 except:
62 trimext = True 60 trimext = True
63 f.close() 61 f.close()
64 elif linf.endswith('bz2'): 62 elif linf.endswith('bz2'):
65 f = bz2.open(self.opts.input,'rb') 63 f = bz2.BZ2File(self.opts.input, 'r')
66 try: 64 try:
67 f.readline() 65 f.readline()
68 except: 66 except:
69 trimext = True 67 trimext = True
70 f.close() 68 f.close()
71 elif linf.endswith('.zip'): 69 elif linf.endswith('.zip'):
72 if not zipfile.is_zipfile(self.opts.input): 70 if not zipfile.is_zipfile(self.opts.input):
73 trimext = True 71 trimext = True
74 if trimext: 72 if trimext:
75 f = open(self.opts.input) 73 f = open(self.opts.input)
76 try: 74 try:
77 f.readline() 75 f.readline()
78 except: 76 except:
79 raise Exception("Input file corruption, could not identify the filetype") 77 raise Exception("Input file corruption, could not identify the filetype")
80 infname = os.path.splitext(infname)[0] 78 infname = os.path.splitext(infname)[0]
81 79
82 # Replace unwanted or problematic charaters in the input file name 80 # Replace unwanted or problematic charaters in the input file name
83 self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname)) 81 self.fastqinfilename = re.sub(r'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname))
84 # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise 82 # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise
85 if 'fastq' in opts.informat: 83 if 'fastq' in self.opts.informat:
86 # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's 84 # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's
87 # accepted formats.. 85 # accepted formats..
88 opts.informat = 'fastq' 86 self.opts.informat = 'fastq'
89 elif not self.fastqinfilename.endswith(opts.informat): 87 elif not self.fastqinfilename.endswith(self.opts.informat):
90 self.fastqinfilename += '.%s' % opts.informat 88 self.fastqinfilename += '.%s' % self.opts.informat
91 89
92 # Build the Commandline from the given parameters 90 # Build the Commandline from the given parameters
93 command_line = [opts.executable, '--outdir %s' % opts.outputdir] 91 command_line = [opts.executable, '--outdir %s' % self.opts.outputdir]
94 if opts.contaminants != None: 92 if self.opts.contaminants is not None:
95 command_line.append('--contaminants %s' % opts.contaminants) 93 command_line.append('--contaminants %s' % self.opts.contaminants)
96 if opts.limits != None: 94 if self.opts.limits is not None:
97 command_line.append('--limits %s' % opts.limits) 95 command_line.append('--limits %s' % self.opts.limits)
98 command_line.append('--quiet') 96 command_line.append('--quiet')
99 command_line.append('--extract') # to access the output text file 97 command_line.append('--extract') # to access the output text file
100 if type[-1] != "gzip": 98 if type[-1] != "gzip":
101 command_line.append('-f %s' % opts.informat) 99 command_line.append('-f %s' % self.opts.informat)
102 else: 100 else:
103 self.fastqinfilename += ".gz" 101 self.fastqinfilename += ".gz"
104 command_line.append(self.fastqinfilename) 102 command_line.append(self.fastqinfilename)
105 self.command_line = ' '.join(command_line) 103 self.command_line = ' '.join(command_line)
106 104
107 def copy_output_file_to_dataset(self): 105 def copy_output_file_to_dataset(self):
108 ''' 106 '''
109 Retrieves the output html and text files from the output directory and copies them to the Galaxy output files 107 Retrieves the output html and text files from the output directory and copies them to the Galaxy output files
110 ''' 108 '''
111 109
112 # retrieve html file 110 # retrieve html file
113 result_file = glob.glob(opts.outputdir + '/*html') 111 result_file = glob.glob(self.opts.outputdir + '/*html')
114 with open(result_file[0], 'rb') as fsrc: 112 with open(result_file[0], 'rb') as fsrc:
115 with open(self.opts.htmloutput, 'wb') as fdest: 113 with open(self.opts.htmloutput, 'wb') as fdest:
116 shutil.copyfileobj(fsrc, fdest) 114 shutil.copyfileobj(fsrc, fdest)
117 115
118 # retrieve text file 116 # retrieve text file
119 text_file = glob.glob(opts.outputdir + '/*/fastqc_data.txt') 117 text_file = glob.glob(self.opts.outputdir + '/*/fastqc_data.txt')
120 with open(text_file[0], 'rb') as fsrc: 118 with open(text_file[0], 'rb') as fsrc:
121 with open(self.opts.textoutput, 'wb') as fdest: 119 with open(self.opts.textoutput, 'wb') as fdest:
122 shutil.copyfileobj(fsrc, fdest) 120 shutil.copyfileobj(fsrc, fdest)
123 121
124 def run_fastqc(self): 122 def run_fastqc(self):
125 ''' 123 '''
126 Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options (opts) 124 Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options
127 ''' 125 '''
128 126
129 # Create a log file 127 # Create a log file
130 dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir) 128 dummy, tlog = tempfile.mkstemp(prefix='rgFastQC', suffix=".log", dir=self.opts.outputdir)
131 sout = open(tlog, 'w') 129 sout = open(tlog, 'w')
132 130
133 self.prepare_command_line() 131 self.prepare_command_line()
134 sout.write(self.command_line) 132 sout.write(self.command_line)
135 sout.write('\n') 133 sout.write('\n')
136 sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name 134 sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name
137 os.symlink(self.opts.input, self.fastqinfilename) 135 os.symlink(self.opts.input, self.fastqinfilename)
138 sout.write("check_call\n") 136 sout.write("check_call\n")
139 subprocess.check_call(self.command_line, shell=True) 137 subprocess.check_call(self.command_line, shell=True)
140 sout.write("Copying working %s file to %s \n" % (self.fastqinfilename, self.opts.htmloutput)) 138 sout.write("Copying working %s file to %s \n" % (self.fastqinfilename, self.opts.htmloutput))
141 self.copy_output_file_to_dataset() 139 self.copy_output_file_to_dataset()
142 sout.write("Finished") 140 sout.write("Finished")
143 sout.close() 141 sout.close()
142
144 143
145 if __name__ == '__main__': 144 if __name__ == '__main__':
146 op = optparse.OptionParser() 145 op = optparse.OptionParser()
147 op.add_option('-i', '--input', default=None) 146 op.add_option('-i', '--input', default=None)
148 op.add_option('-j', '--inputfilename', default=None) 147 op.add_option('-j', '--inputfilename', default=None)
154 op.add_option('-c', '--contaminants', default=None) 153 op.add_option('-c', '--contaminants', default=None)
155 op.add_option('-l', '--limits', default=None) 154 op.add_option('-l', '--limits', default=None)
156 op.add_option('-e', '--executable', default='fastqc') 155 op.add_option('-e', '--executable', default='fastqc')
157 opts, args = op.parse_args() 156 opts, args = op.parse_args()
158 157
159 assert opts.input != None 158 assert opts.input is not None
160 assert opts.inputfilename != None 159 assert opts.inputfilename is not None
161 assert opts.htmloutput != None 160 assert opts.htmloutput is not None
162 if not os.path.exists(opts.outputdir): 161 if not os.path.exists(opts.outputdir):
163 os.makedirs(opts.outputdir) 162 os.makedirs(opts.outputdir)
164 163
165 fastqc_runner = FastQCRunner(opts) 164 fastqc_runner = FastQCRunner(opts)
166 fastqc_runner.run_fastqc() 165 fastqc_runner.run_fastqc()