Mercurial > repos > devteam > fastqc
comparison rgFastQC.py @ 8:06819360a9e2 draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/fastqc commit df4c0b0c6372e2984966e220fa42ecd8a3d370e8
author | devteam |
---|---|
date | Mon, 31 Oct 2016 10:40:12 -0400 |
parents | 3fdc1a74d866 |
children | 3a458e268066 |
comparison
equal
deleted
inserted
replaced
7:3fdc1a74d866 | 8:06819360a9e2 |
---|---|
28 import gzip | 28 import gzip |
29 import bz2 | 29 import bz2 |
30 import zipfile | 30 import zipfile |
31 | 31 |
32 class FastQCRunner(object): | 32 class FastQCRunner(object): |
33 | 33 |
34 def __init__(self,opts=None): | 34 def __init__(self,opts=None): |
35 ''' | 35 ''' |
36 Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc() | 36 Initializes an object to run FastQC in Galaxy. To start the process, use the function run_fastqc() |
37 ''' | 37 ''' |
38 | 38 |
39 # Check whether the options are specified and saves them into the object | 39 # Check whether the options are specified and saves them into the object |
40 assert opts != None | 40 assert opts != None |
41 self.opts = opts | 41 self.opts = opts |
42 | 42 |
43 def prepare_command_line(self): | 43 def prepare_command_line(self): |
44 ''' | 44 ''' |
45 Develops the Commandline to run FastQC in Galaxy | 45 Develops the Commandline to run FastQC in Galaxy |
46 ''' | 46 ''' |
47 | 47 |
48 # Check whether a given file compression format is valid | 48 # Check whether a given file compression format is valid |
49 # This prevents uncompression of already uncompressed files | 49 # This prevents uncompression of already uncompressed files |
50 infname = self.opts.inputfilename | 50 infname = self.opts.inputfilename |
51 linf = infname.lower() | 51 linf = infname.lower() |
52 trimext = False | 52 trimext = False |
53 # decompression at upload currently does NOT remove this now bogus ending - fastqc will barf | 53 # decompression at upload currently does NOT remove this now bogus ending - fastqc will barf |
54 # patched may 29 2013 until this is fixed properly | 54 # patched may 29 2013 until this is fixed properly |
55 if ( linf.endswith('.gz') or linf.endswith('.gzip') ): | 55 if ( linf.endswith('.gz') or linf.endswith('.gzip') ): |
56 f = gzip.open(self.opts.input) | 56 f = gzip.open(self.opts.input) |
57 try: | 57 try: |
58 f.readline() | 58 f.readline() |
59 except: | 59 except: |
60 trimext = True | 60 trimext = True |
74 try: | 74 try: |
75 f.readline() | 75 f.readline() |
76 except: | 76 except: |
77 raise Exception("Input file corruption, could not identify the filetype") | 77 raise Exception("Input file corruption, could not identify the filetype") |
78 infname = os.path.splitext(infname)[0] | 78 infname = os.path.splitext(infname)[0] |
79 | 79 |
80 # Replace unwanted or problematic charaters in the input file name | 80 # Replace unwanted or problematic charaters in the input file name |
81 self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname)) | 81 self.fastqinfilename = re.sub(ur'[^a-zA-Z0-9_\-\.]', '_', os.path.basename(infname)) |
82 # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise | 82 # check that the symbolic link gets a proper ending, fastqc seems to ignore the given format otherwise |
83 if 'fastq' in opts.informat: | 83 if 'fastq' in opts.informat: |
84 # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's | 84 # with fastq the .ext is ignored, but when a format is actually passed it must comply with fastqc's |
85 # accepted formats.. | 85 # accepted formats.. |
86 opts.informat = 'fastq' | 86 opts.informat = 'fastq' |
87 elif not self.fastqinfilename.endswith(opts.informat): | 87 elif not self.fastqinfilename.endswith(opts.informat): |
88 self.fastqinfilename += '.%s' % opts.informat | 88 self.fastqinfilename += '.%s' % opts.informat |
89 | 89 |
101 | 101 |
102 def copy_output_file_to_dataset(self): | 102 def copy_output_file_to_dataset(self): |
103 ''' | 103 ''' |
104 Retrieves the output html and text files from the output directory and copies them to the Galaxy output files | 104 Retrieves the output html and text files from the output directory and copies them to the Galaxy output files |
105 ''' | 105 ''' |
106 | 106 |
107 # retrieve html file | 107 # retrieve html file |
108 result_file = glob.glob(opts.outputdir + '/*html') | 108 result_file = glob.glob(opts.outputdir + '/*html') |
109 with open(result_file[0], 'rb') as fsrc: | 109 with open(result_file[0], 'rb') as fsrc: |
110 with open(self.opts.htmloutput, 'wb') as fdest: | 110 with open(self.opts.htmloutput, 'wb') as fdest: |
111 shutil.copyfileobj(fsrc, fdest) | 111 shutil.copyfileobj(fsrc, fdest) |
112 | 112 |
113 # retrieve text file | 113 # retrieve text file |
114 text_file = glob.glob(opts.outputdir + '/*/fastqc_data.txt') | 114 text_file = glob.glob(opts.outputdir + '/*/fastqc_data.txt') |
115 with open(text_file[0], 'rb') as fsrc: | 115 with open(text_file[0], 'rb') as fsrc: |
116 with open(self.opts.textoutput, 'wb') as fdest: | 116 with open(self.opts.textoutput, 'wb') as fdest: |
117 shutil.copyfileobj(fsrc, fdest) | 117 shutil.copyfileobj(fsrc, fdest) |
118 | 118 |
119 def run_fastqc(self): | 119 def run_fastqc(self): |
120 ''' | 120 ''' |
121 Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options (opts) | 121 Executes FastQC. Make sure the mandatory import parameters input, inputfilename, outputdir and htmloutput have been specified in the options (opts) |
122 ''' | 122 ''' |
123 | 123 |
124 # Create a log file | 124 # Create a log file |
125 dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir) | 125 dummy,tlog = tempfile.mkstemp(prefix='rgFastQC',suffix=".log",dir=self.opts.outputdir) |
126 sout = open(tlog, 'w') | 126 sout = open(tlog, 'w') |
127 | 127 |
128 self.prepare_command_line() | 128 self.prepare_command_line() |
129 sout.write(self.command_line) | 129 sout.write(self.command_line) |
130 sout.write('\n') | 130 sout.write('\n') |
131 sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name | 131 sout.write("Creating symlink\n") # between the input (.dat) file and the given input file name |
132 os.symlink(self.opts.input, self.fastqinfilename) | 132 os.symlink(self.opts.input, self.fastqinfilename) |
148 op.add_option('-n', '--namejob', default='rgFastQC') | 148 op.add_option('-n', '--namejob', default='rgFastQC') |
149 op.add_option('-c', '--contaminants', default=None) | 149 op.add_option('-c', '--contaminants', default=None) |
150 op.add_option('-l', '--limits', default=None) | 150 op.add_option('-l', '--limits', default=None) |
151 op.add_option('-e', '--executable', default='fastqc') | 151 op.add_option('-e', '--executable', default='fastqc') |
152 opts, args = op.parse_args() | 152 opts, args = op.parse_args() |
153 | 153 |
154 assert opts.input != None | 154 assert opts.input != None |
155 assert opts.inputfilename != None | 155 assert opts.inputfilename != None |
156 assert opts.htmloutput != None | 156 assert opts.htmloutput != None |
157 if not os.path.exists(opts.outputdir): | 157 if not os.path.exists(opts.outputdir): |
158 os.makedirs(opts.outputdir) | 158 os.makedirs(opts.outputdir) |
159 | 159 |
160 fastqc_runner = FastQCRunner(opts) | 160 fastqc_runner = FastQCRunner(opts) |
161 fastqc_runner.run_fastqc() | 161 fastqc_runner.run_fastqc() |