Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
comparison scripts/ReMatCh/modules/utils.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Tue, 28 Jan 2020 10:42:31 -0500 |
parents | 965517909457 |
children |
comparison
equal
deleted
inserted
replaced
2:6837f733b4aa | 3:0cbed1c0a762 |
---|---|
1 import pickle | 1 import pickle |
2 import traceback | 2 from traceback import format_exception as traceback_format_exception |
3 import shlex | 3 import shlex |
4 import subprocess | 4 import subprocess |
5 from threading import Timer | 5 from threading import Timer |
6 import shutil | 6 import shutil |
7 import time | 7 import time |
8 import functools | 8 from functools import wraps as functools_wraps |
9 import os.path | 9 import os.path |
10 import sys | 10 import sys |
11 | 11 |
12 | 12 |
13 def start_logger(workdir): | 13 def start_logger(workdir): |
36 | 36 |
37 | 37 |
38 def get_cpu_information(outdir, time_str): | 38 def get_cpu_information(outdir, time_str): |
39 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.cpu.txt'), 'wt') as writer: | 39 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.cpu.txt'), 'wt') as writer: |
40 command = ['cat', '/proc/cpuinfo'] | 40 command = ['cat', '/proc/cpuinfo'] |
41 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, None, False) | 41 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, None, False) |
42 if run_successfully: | 42 if run_successfully: |
43 writer.write(stdout) | 43 writer.write(stdout) |
44 | 44 |
45 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.slurm.txt'), 'wt') as writer: | 45 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.slurm.txt'), 'wt') as writer: |
46 for environment in sorted(os.environ): | 46 for environment in sorted(os.environ): |
58 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin') | 58 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin') |
59 | 59 |
60 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable])) | 60 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable])) |
61 | 61 |
62 # Print PATH variable | 62 # Print PATH variable |
63 print '\n' + 'PATH variable:' | 63 print('\n' + 'PATH variable:') |
64 print os.environ['PATH'] | 64 print(os.environ['PATH']) |
65 | 65 |
66 | 66 |
67 def checkPrograms(programs_version_dictionary): | 67 def checkPrograms(programs_version_dictionary): |
68 print '\n' + 'Checking dependencies...' | 68 print('\n' + 'Checking dependencies...') |
69 programs = programs_version_dictionary | 69 programs = programs_version_dictionary |
70 which_program = ['which', ''] | 70 which_program = ['which', ''] |
71 listMissings = [] | 71 listMissings = [] |
72 for program in programs: | 72 for program in programs: |
73 which_program[1] = program | 73 which_program[1] = program |
74 run_successfully, stdout, stderr = runCommandPopenCommunicate(which_program, False, None, False) | 74 run_successfully, stdout, stderr = run_command_popen_communicate(which_program, False, None, False) |
75 if not run_successfully: | 75 if not run_successfully: |
76 listMissings.append(program + ' not found in PATH.') | 76 listMissings.append(program + ' not found in PATH.') |
77 else: | 77 else: |
78 print stdout.splitlines()[0] | 78 print(stdout.splitlines()[0]) |
79 if programs[program][0] is None: | 79 if programs[program][0] is None: |
80 print program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0] | 80 print(program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0]) |
81 else: | 81 else: |
82 if program.endswith('.jar'): | 82 if program.endswith('.jar'): |
83 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]] | 83 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]] |
84 programs[program].append(stdout.splitlines()[0]) | 84 programs[program].append(stdout.splitlines()[0]) |
85 else: | 85 else: |
86 check_version = [stdout.splitlines()[0], programs[program][0]] | 86 check_version = [stdout.splitlines()[0], programs[program][0]] |
87 run_successfully, stdout, stderr = runCommandPopenCommunicate(check_version, False, None, False) | 87 run_successfully, stdout, stderr = run_command_popen_communicate(check_version, False, None, False) |
88 if stdout == '': | 88 if stdout == '': |
89 stdout = stderr | 89 stdout = stderr |
90 if program == 'wget': | 90 if program in ['wget', 'awk']: |
91 version_line = stdout.splitlines()[0].split(' ', 3)[2] | 91 version_line = stdout.splitlines()[0].split(' ', 3)[2] |
92 elif program in ['prefetch', 'fastq-dump']: | |
93 version_line = stdout.splitlines()[1].split(' ')[-1] | |
92 else: | 94 else: |
93 version_line = stdout.splitlines()[0].split(' ')[-1] | 95 version_line = stdout.splitlines()[0].split(' ')[-1] |
94 replace_characters = ['"', 'v', 'V', '+'] | 96 replace_characters = ['"', 'v', 'V', '+', ','] |
95 for i in replace_characters: | 97 for i in replace_characters: |
96 version_line = version_line.replace(i, '') | 98 version_line = version_line.replace(i, '') |
97 print program + ' (' + version_line + ') found' | 99 print(program + ' (' + version_line + ') found') |
98 if programs[program][1] == '>=': | 100 if programs[program][1] == '>=': |
99 program_found_version = version_line.split('.') | 101 program_found_version = version_line.split('.') |
100 program_version_required = programs[program][2].split('.') | 102 program_version_required = programs[program][2].split('.') |
101 if len(program_version_required) == 3: | 103 if len(program_version_required) == 3: |
102 if len(program_found_version) == 2: | 104 if len(program_found_version) == 2: |
103 program_found_version.append(0) | 105 program_found_version.append(0) |
104 else: | 106 else: |
105 program_found_version[2] = program_found_version[2].split('_')[0] | 107 program_found_version[2] = program_found_version[2].split('_')[0] |
106 for i in range(0, len(program_version_required)): | 108 for i in range(0, len(program_version_required)): |
107 if int(program_found_version[i]) < int(program_version_required[i]): | 109 if int(program_found_version[i]) > int(program_version_required[i]): |
108 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2]) | 110 break |
111 elif int(program_found_version[i]) == int(program_version_required[i]): | |
112 continue | |
113 else: | |
114 listMissings.append('It is required ' + program + ' with version ' + | |
115 programs[program][1] + ' ' + programs[program][2]) | |
109 else: | 116 else: |
110 if version_line != programs[program][2]: | 117 if version_line != programs[program][2]: |
111 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2]) | 118 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + |
119 ' ' + programs[program][2]) | |
112 return listMissings | 120 return listMissings |
113 | 121 |
114 | 122 |
115 def requiredPrograms(asperaKey, downloadCramBam): | 123 def requiredPrograms(asperaKey, downloadCramBam, SRA, SRAopt): |
116 programs_version_dictionary = {} | 124 programs_version_dictionary = {} |
117 programs_version_dictionary['wget'] = ['--version', '>=', '1.12'] | 125 programs_version_dictionary['wget'] = ['--version', '>=', '1.12'] |
126 programs_version_dictionary['gzip'] = ['--version', '>=', '1.6'] | |
118 programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9'] | 127 programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9'] |
119 programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1'] | 128 programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1'] |
120 programs_version_dictionary['bcftools'] = ['--version', '==', '1.3.1'] | 129 programs_version_dictionary['bcftools'] = ['--version', '==', '1.3.1'] |
121 if asperaKey is not None: | 130 if asperaKey is not None: |
122 programs_version_dictionary['ascp'] = ['--version', '>=', '3.6.1'] | 131 programs_version_dictionary['ascp'] = ['--version', '>=', '3.6.1'] |
123 if downloadCramBam: | 132 if SRA or SRAopt: |
124 programs_version_dictionary['gzip'] = ['--version', '>=', '1.6'] | 133 programs_version_dictionary['prefetch'] = ['--version', '>=', '2.8.2'] |
134 programs_version_dictionary['fastq-dump'] = ['--version', '>=', '2.8.2'] | |
135 programs_version_dictionary['awk'] = ['--version', '>=', '3.0.4'] | |
125 missingPrograms = checkPrograms(programs_version_dictionary) | 136 missingPrograms = checkPrograms(programs_version_dictionary) |
126 if len(missingPrograms) > 0: | 137 if len(missingPrograms) > 0: |
127 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) | 138 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) |
128 | 139 |
129 | 140 |
130 def general_information(logfile, version, outdir, time_str, doNotUseProvidedSoftware, asperaKey, downloadCramBam): | 141 def general_information(logfile, version, outdir, time_str, doNotUseProvidedSoftware, asperaKey, downloadCramBam, SRA, SRAopt): |
131 # Check if output directory exists | 142 # Check if output directory exists |
132 | 143 |
133 print '\n' + '==========> ReMatCh <==========' | 144 print('\n' + '==========> ReMatCh <==========') |
134 print '\n' + 'Program start: ' + time.ctime() | 145 print('\n' + 'Program start: ' + time.ctime()) |
135 | 146 |
136 # Tells where the logfile will be stored | 147 # Tells where the logfile will be stored |
137 print '\n' + 'LOGFILE:' | 148 print('\n' + 'LOGFILE:') |
138 print logfile | 149 print(logfile) |
139 | 150 |
140 # Print command | 151 # Print command |
141 print '\n' + 'COMMAND:' | 152 print('\n' + 'COMMAND:') |
142 script_path = os.path.abspath(sys.argv[0]) | 153 script_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'rematch.py') |
143 print sys.executable + ' ' + script_path + ' ' + ' '.join(sys.argv[1:]) | 154 print(sys.executable + ' ' + ' '.join(sys.argv)) |
144 | 155 |
145 # Print directory where programme was lunch | 156 # Print directory where programme was lunch |
146 print '\n' + 'PRESENT DIRECTORY:' | 157 print('\n' + 'PRESENT DIRECTORY:') |
147 present_directory = os.path.abspath(os.getcwd()) | 158 present_directory = os.path.abspath(os.getcwd()) |
148 print present_directory | 159 print(present_directory) |
149 | 160 |
150 # Print program version | 161 # Print program version |
151 print '\n' + 'VERSION:' | 162 print('\n' + 'VERSION:') |
152 scriptVersionGit(version, present_directory, script_path) | 163 script_version_git(version, present_directory, script_path) |
153 | 164 |
154 # Get CPU information | 165 # Get CPU information |
155 get_cpu_information(outdir, time_str) | 166 get_cpu_information(outdir, time_str) |
156 | 167 |
157 # Set and print PATH variable | 168 # Set and print PATH variable |
158 setPATHvariable(doNotUseProvidedSoftware, script_path) | 169 setPATHvariable(doNotUseProvidedSoftware, script_path) |
159 | 170 |
160 # Check programms | 171 # Check programms |
161 requiredPrograms(asperaKey, downloadCramBam) | 172 requiredPrograms(asperaKey, downloadCramBam, SRA, SRAopt) |
162 | 173 |
163 return script_path | 174 return script_path |
164 | 175 |
165 | 176 |
166 def scriptVersionGit(version, directory, script_path): | 177 def script_version_git(version, current_directory, script_path, no_git_info=False): |
167 print 'Version ' + version | 178 """ |
168 | 179 Print script version and get GitHub commit information |
169 try: | 180 |
170 os.chdir(os.path.dirname(script_path)) | 181 Parameters |
171 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"'] | 182 ---------- |
172 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) | 183 version : str |
173 print stdout | 184 Version of the script, e.g. "4.0" |
174 command = ['git', 'remote', 'show', 'origin'] | 185 current_directory : str |
175 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) | 186 Path to the directory where the script was start to run |
176 print stdout | 187 script_path : str |
177 os.chdir(directory) | 188 Path to the script running |
178 except: | 189 no_git_info : bool, default False |
179 print 'HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be obtained.' | 190 True if it is not necessary to retreive the GitHub commit information |
180 | 191 |
181 | 192 Returns |
182 def runTime(start_time): | 193 ------- |
194 | |
195 """ | |
196 print('Version {}'.format(version)) | |
197 | |
198 if not no_git_info: | |
199 try: | |
200 os.chdir(os.path.dirname(os.path.dirname(script_path))) | |
201 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"'] | |
202 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, 15, False) | |
203 print(stdout) | |
204 command = ['git', 'remote', 'show', 'origin'] | |
205 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, 15, False) | |
206 print(stdout) | |
207 except: | |
208 print('HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be' | |
209 ' obtained.') | |
210 finally: | |
211 os.chdir(current_directory) | |
212 | |
213 | |
214 def run_time(start_time): | |
183 end_time = time.time() | 215 end_time = time.time() |
184 time_taken = end_time - start_time | 216 time_taken = end_time - start_time |
185 hours, rest = divmod(time_taken, 3600) | 217 hours, rest = divmod(time_taken, 3600) |
186 minutes, seconds = divmod(rest, 60) | 218 minutes, seconds = divmod(rest, 60) |
187 print 'Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's' | 219 print('Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's') |
188 return round(time_taken, 2) | 220 return round(time_taken, 2) |
189 | 221 |
190 | 222 |
191 def timer(function, name): | 223 def timer(function, name): |
192 @functools.wraps(function) | 224 @functools_wraps(function) |
193 def wrapper(*args, **kwargs): | 225 def wrapper(*args, **kwargs): |
194 print('\n' + 'RUNNING {0}\n'.format(name)) | 226 print('\n' + 'RUNNING {0}\n'.format(name)) |
195 start_time = time.time() | 227 start_time = time.time() |
196 | 228 |
197 results = list(function(*args, **kwargs)) # guarantees return is a list to allow .insert() | 229 results = list(function(*args, **kwargs)) # guarantees return is a list to allow .insert() |
198 | 230 |
199 time_taken = runTime(start_time) | 231 time_taken = run_time(start_time) |
200 print('END {0}'.format(name)) | 232 print('END {0}'.format(name)) |
201 | 233 |
202 results.insert(0, time_taken) | 234 results.insert(0, time_taken) |
203 return results | 235 return results |
204 return wrapper | 236 return wrapper |
205 | 237 |
206 | 238 |
207 def removeDirectory(directory): | 239 def remove_directory(directory): |
208 if os.path.isdir(directory): | 240 if os.path.isdir(directory): |
209 shutil.rmtree(directory) | 241 shutil.rmtree(directory) |
210 | 242 |
211 | 243 |
212 def saveVariableToPickle(variableToStore, outdir, prefix): | 244 def save_variable_to_pickle(variableToStore, outdir, prefix): |
213 pickleFile = os.path.join(outdir, str(prefix + '.pkl')) | 245 pickleFile = os.path.join(outdir, str(prefix + '.pkl')) |
214 with open(pickleFile, 'wb') as writer: | 246 with open(pickleFile, 'wb') as writer: |
215 pickle.dump(variableToStore, writer) | 247 pickle.dump(variableToStore, writer) |
216 | 248 |
217 | 249 |
218 def extractVariableFromPickle(pickleFile): | 250 def extract_variable_from_pickle(pickleFile): |
219 with open(pickleFile, 'rb') as reader: | 251 with open(pickleFile, 'rb') as reader: |
220 variable = pickle.load(reader) | 252 variable = pickle.load(reader) |
221 return variable | 253 return variable |
222 | 254 |
223 | 255 |
224 def trace_unhandled_exceptions(func): | 256 def trace_unhandled_exceptions(func): |
225 @functools.wraps(func) | 257 @functools_wraps(func) |
226 def wrapped_func(*args, **kwargs): | 258 def wrapped_func(*args, **kwargs): |
227 try: | 259 try: |
228 func(*args, **kwargs) | 260 func(*args, **kwargs) |
229 except: | 261 except Exception as e: |
230 print 'Exception in ' + func.__name__ | 262 print('Exception in ' + func.__name__) |
231 traceback.print_exc() | 263 print(e) |
264 | |
265 exc_type, exc_value, exc_tb = sys.exc_info() | |
266 print(''.join(traceback_format_exception(exc_type, exc_value, exc_tb))) | |
267 | |
268 raise exc_type(exc_value) | |
269 | |
232 return wrapped_func | 270 return wrapped_func |
233 | 271 |
234 | 272 |
235 def kill_subprocess_Popen(subprocess_Popen, command): | 273 def kill_subprocess_Popen(subprocess_Popen, command): |
236 print 'Command run out of time: ' + str(command) | 274 print('Command run out of time: ' + str(command)) |
237 subprocess_Popen.kill() | 275 subprocess_Popen.kill() |
238 | 276 |
239 | 277 |
240 def runCommandPopenCommunicate(command, shell_True, timeout_sec_None, print_comand_True): | 278 def run_command_popen_communicate(command, shell_True, timeout_sec_None, print_comand_True): |
241 run_successfully = False | 279 run_successfully = False |
242 if not isinstance(command, basestring): | 280 if not isinstance(command, str): |
243 command = ' '.join(command) | 281 command = ' '.join(command) |
244 command = shlex.split(command) | 282 command = shlex.split(command) |
245 | 283 |
246 if print_comand_True: | 284 if print_comand_True: |
247 print 'Running: ' + ' '.join(command) | 285 print('Running: ' + ' '.join(command)) |
248 | 286 |
249 if shell_True: | 287 if shell_True: |
250 command = ' '.join(command) | 288 command = ' '.join(command) |
251 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) | 289 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) |
252 else: | 290 else: |
254 | 292 |
255 not_killed_by_timer = True | 293 not_killed_by_timer = True |
256 if timeout_sec_None is None: | 294 if timeout_sec_None is None: |
257 stdout, stderr = proc.communicate() | 295 stdout, stderr = proc.communicate() |
258 else: | 296 else: |
259 timer = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,)) | 297 time_counter = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,)) |
260 timer.start() | 298 time_counter.start() |
261 stdout, stderr = proc.communicate() | 299 stdout, stderr = proc.communicate() |
262 timer.cancel() | 300 time_counter.cancel() |
263 not_killed_by_timer = timer.isAlive() | 301 not_killed_by_timer = time_counter.isAlive() |
264 | 302 |
265 if proc.returncode == 0: | 303 if proc.returncode == 0: |
266 run_successfully = True | 304 run_successfully = True |
267 else: | 305 else: |
268 if not print_comand_True and not_killed_by_timer: | 306 if not print_comand_True and not_killed_by_timer: |
269 print 'Running: ' + str(command) | 307 print('Running: ' + str(command)) |
270 if len(stdout) > 0: | 308 if len(stdout) > 0: |
271 print 'STDOUT' | 309 print('STDOUT') |
272 print stdout.decode("utf-8") | 310 print(stdout.decode("utf-8")) |
273 if len(stderr) > 0: | 311 if len(stderr) > 0: |
274 print 'STDERR' | 312 print('STDERR') |
275 print stderr.decode("utf-8") | 313 print(stderr.decode("utf-8")) |
276 return run_successfully, stdout, stderr | 314 return run_successfully, stdout.decode("utf-8"), stderr.decode("utf-8") |
277 | 315 |
278 | 316 |
279 def rchop(string, ending): | 317 def rchop(string, ending): |
280 if string.endswith(ending): | 318 if string.endswith(ending): |
281 string = string[:-len(ending)] | 319 string = string[:-len(ending)] |
283 | 321 |
284 | 322 |
285 def reverse_complement(seq): | 323 def reverse_complement(seq): |
286 complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'} | 324 complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'} |
287 | 325 |
288 reverse_complement = '' | 326 reverse_complement_string = '' |
289 | 327 |
290 seq = reversed(list(seq.upper())) | 328 seq = reversed(list(seq.upper())) |
291 | 329 |
292 for base in seq: | 330 for base in seq: |
293 reverse_complement += complement[base] | 331 reverse_complement_string += complement[base] |
294 | 332 |
295 return reverse_complement | 333 return reverse_complement_string |