comparison rgFastQC.py @ 8:06819360a9e2 draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
author devteam
date Mon, 31 Oct 2016 10:40:12 -0400
parents 3fdc1a74d866
children 3a458e268066
comparison
equal deleted inserted replaced
7:3fdc1a74d866 8:06819360a9e2
28 import gzip 28 import gzip
29 import bz2 29 import bz2
30 import zipfile 30 import zipfile
31 31
32 class FastQCRunner(object): 32 class FastQCRunner(object):
33 33
34 def __init__(self,opts=None): 34 def __init__(self,opts=None):
35 ''' 35 '''
36 Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc() 36 Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc()
37 ''' 37 '''
38 38
39 # Check whether the options are specified and saves them into the object 39 # Check whether the options are specified and saves them into the object
40 assert opts != None 40 assert opts != None
41 self.opts = opts 41 self.opts = opts
42 42
43 def prepare_command_line(self): 43 def prepare_command_line(self):
44 ''' 44 '''
45 Develops the Commandline to run FastQC in Galaxy 45 Develops the Commandline to run FastQC in Galaxy
46 ''' 46 '''
47 47
48 # Check whether a given file compression format is valid 48 # Check whether a given file compression format is valid
49 # This prevents uncompression of already uncompressed files 49 # This prevents uncompression of already uncompressed files
50 infname = self.opts.inputfilename 50 infname = self.opts.inputfilename
51 linf = infname.lower() 51 linf = infname.lower()
52 trimext = False 52 trimext = False
53 # decompression at upload currently does NOT remove this now bogus ending - fastqc will barf 53 # decompression at upload currently does NOT remove this now bogus ending - fastqc will barf
54 # patched may 29 2013 until this is fixed properly 54 # patched may 29 2013 until this is fixed properly
55 if ( linf.endswith('.gz') or linf.endswith('.gzip') ): 55 if ( linf.endswith('.gz') or linf.endswith('.gzip') ):
56 f = gzip.open(self.opts.input) 56 f = gzip.open(self.opts.input)
57 try: 57 try:
58 f.readline() 58 f.readline()
59 except: 59 except:
60 trimext = True 60 trimext = True
74 try: 74 try:
75 f.readline() 75 f.readline()
76 except: 76 except:
77 raise Exception("Input file corruption, could not identify the filetype") 77 raise Exception("Input file corruption, could not identify the filetype")
78 infname = os.path.splitext(infname)[0] 78 infname = os.path.splitext(infname)[0]
79 79
80 # Replace unwanted or problematic charaters in the input file name 80 # Replace unwanted or problematic charaters in the input file name
81 self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname)) 81 self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname))
82 # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise 82 # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise
83 if 'fastq' in opts.informat: 83 if 'fastq' in opts.informat:
84 # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's 84 # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's
85 # accepted formats.. 85 # accepted formats..
86 opts.informat = 'fastq' 86 opts.informat = 'fastq'
87 elif not self.fastqinfilename.endswith(opts.informat): 87 elif not self.fastqinfilename.endswith(opts.informat):
88 self.fastqinfilename += '.%s' % opts.informat 88 self.fastqinfilename += '.%s' % opts.informat
89 89
101 101
102 def copy_output_file_to_dataset(self): 102 def copy_output_file_to_dataset(self):
103 ''' 103 '''
104 Retrieves the output html and text files from the output directory and copies them to the Galaxy output files 104 Retrieves the output html and text files from the output directory and copies them to the Galaxy output files
105 ''' 105 '''
106 106
107 # retrieve html file 107 # retrieve html file
108 result_file = glob.glob(opts.outputdir + '/*html') 108 result_file = glob.glob(opts.outputdir + '/*html')
109 with open(result_file[0], 'rb') as fsrc: 109 with open(result_file[0], 'rb') as fsrc:
110 with open(self.opts.htmloutput, 'wb') as fdest: 110 with open(self.opts.htmloutput, 'wb') as fdest:
111 shutil.copyfileobj(fsrc, fdest) 111 shutil.copyfileobj(fsrc, fdest)
112 112
113 # retrieve text file 113 # retrieve text file
114 text_file = glob.glob(opts.outputdir + '/*/fastqc_data.txt') 114 text_file = glob.glob(opts.outputdir + '/*/fastqc_data.txt')
115 with open(text_file[0], 'rb') as fsrc: 115 with open(text_file[0], 'rb') as fsrc:
116 with open(self.opts.textoutput, 'wb') as fdest: 116 with open(self.opts.textoutput, 'wb') as fdest:
117 shutil.copyfileobj(fsrc, fdest) 117 shutil.copyfileobj(fsrc, fdest)
118 118
119 def run_fastqc(self): 119 def run_fastqc(self):
120 ''' 120 '''
121 Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options (opts) 121 Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options (opts)
122 ''' 122 '''
123 123
124 # Create a log file 124 # Create a log file
125 dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir) 125 dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir)
126 sout = open(tlog, 'w') 126 sout = open(tlog, 'w')
127 127
128 self.prepare_command_line() 128 self.prepare_command_line()
129 sout.write(self.command_line) 129 sout.write(self.command_line)
130 sout.write('\n') 130 sout.write('\n')
131 sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name 131 sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name
132 os.symlink(self.opts.input, self.fastqinfilename) 132 os.symlink(self.opts.input, self.fastqinfilename)
148 op.add_option('-n', '--namejob', default='rgFastQC') 148 op.add_option('-n', '--namejob', default='rgFastQC')
149 op.add_option('-c', '--contaminants', default=None) 149 op.add_option('-c', '--contaminants', default=None)
150 op.add_option('-l', '--limits', default=None) 150 op.add_option('-l', '--limits', default=None)
151 op.add_option('-e', '--executable', default='fastqc') 151 op.add_option('-e', '--executable', default='fastqc')
152 opts, args = op.parse_args() 152 opts, args = op.parse_args()
153 153
154 assert opts.input != None 154 assert opts.input != None
155 assert opts.inputfilename != None 155 assert opts.inputfilename != None
156 assert opts.htmloutput != None 156 assert opts.htmloutput != None
157 if not os.path.exists(opts.outputdir): 157 if not os.path.exists(opts.outputdir):
158 os.makedirs(opts.outputdir) 158 os.makedirs(opts.outputdir)
159 159
160 fastqc_runner = FastQCRunner(opts) 160 fastqc_runner = FastQCRunner(opts)
161 fastqc_runner.run_fastqc() 161 fastqc_runner.run_fastqc()