comparison scripts/ReMatCh/modules/utils.py @ 3:0cbed1c0a762 draft default tip

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Tue, 28 Jan 2020 10:42:31 -0500
parents 965517909457
children
comparison
equal deleted inserted replaced
2:6837f733b4aa 3:0cbed1c0a762
1 import pickle 1 import pickle
2 import traceback 2 from traceback import format_exception as traceback_format_exception
3 import shlex 3 import shlex
4 import subprocess 4 import subprocess
5 from threading import Timer 5 from threading import Timer
6 import shutil 6 import shutil
7 import time 7 import time
8 import functools 8 from functools import wraps as functools_wraps
9 import os.path 9 import os.path
10 import sys 10 import sys
11 11
12 12
13 def start_logger(workdir): 13 def start_logger(workdir):
36 36
37 37
38 def get_cpu_information(outdir, time_str): 38 def get_cpu_information(outdir, time_str):
39 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.cpu.txt'), 'wt') as writer: 39 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.cpu.txt'), 'wt') as writer:
40 command = ['cat', '/proc/cpuinfo'] 40 command = ['cat', '/proc/cpuinfo']
41 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, None, False) 41 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, None, False)
42 if run_successfully: 42 if run_successfully:
43 writer.write(stdout) 43 writer.write(stdout)
44 44
45 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.slurm.txt'), 'wt') as writer: 45 with open(os.path.join(outdir, 'cpu_information.' + time_str + '.slurm.txt'), 'wt') as writer:
46 for environment in sorted(os.environ): 46 for environment in sorted(os.environ):
58 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin') 58 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin')
59 59
60 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable])) 60 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable]))
61 61
62 # Print PATH variable 62 # Print PATH variable
63 print '\n' + 'PATH variable:' 63 print('\n' + 'PATH variable:')
64 print os.environ['PATH'] 64 print(os.environ['PATH'])
65 65
66 66
67 def checkPrograms(programs_version_dictionary): 67 def checkPrograms(programs_version_dictionary):
68 print '\n' + 'Checking dependencies...' 68 print('\n' + 'Checking dependencies...')
69 programs = programs_version_dictionary 69 programs = programs_version_dictionary
70 which_program = ['which', ''] 70 which_program = ['which', '']
71 listMissings = [] 71 listMissings = []
72 for program in programs: 72 for program in programs:
73 which_program[1] = program 73 which_program[1] = program
74 run_successfully, stdout, stderr = runCommandPopenCommunicate(which_program, False, None, False) 74 run_successfully, stdout, stderr = run_command_popen_communicate(which_program, False, None, False)
75 if not run_successfully: 75 if not run_successfully:
76 listMissings.append(program + ' not found in PATH.') 76 listMissings.append(program + ' not found in PATH.')
77 else: 77 else:
78 print stdout.splitlines()[0] 78 print(stdout.splitlines()[0])
79 if programs[program][0] is None: 79 if programs[program][0] is None:
80 print program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0] 80 print(program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0])
81 else: 81 else:
82 if program.endswith('.jar'): 82 if program.endswith('.jar'):
83 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]] 83 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]]
84 programs[program].append(stdout.splitlines()[0]) 84 programs[program].append(stdout.splitlines()[0])
85 else: 85 else:
86 check_version = [stdout.splitlines()[0], programs[program][0]] 86 check_version = [stdout.splitlines()[0], programs[program][0]]
87 run_successfully, stdout, stderr = runCommandPopenCommunicate(check_version, False, None, False) 87 run_successfully, stdout, stderr = run_command_popen_communicate(check_version, False, None, False)
88 if stdout == '': 88 if stdout == '':
89 stdout = stderr 89 stdout = stderr
90 if program == 'wget': 90 if program in ['wget', 'awk']:
91 version_line = stdout.splitlines()[0].split(' ', 3)[2] 91 version_line = stdout.splitlines()[0].split(' ', 3)[2]
92 elif program in ['prefetch', 'fastq-dump']:
93 version_line = stdout.splitlines()[1].split(' ')[-1]
92 else: 94 else:
93 version_line = stdout.splitlines()[0].split(' ')[-1] 95 version_line = stdout.splitlines()[0].split(' ')[-1]
94 replace_characters = ['"', 'v', 'V', '+'] 96 replace_characters = ['"', 'v', 'V', '+', ',']
95 for i in replace_characters: 97 for i in replace_characters:
96 version_line = version_line.replace(i, '') 98 version_line = version_line.replace(i, '')
97 print program + ' (' + version_line + ') found' 99 print(program + ' (' + version_line + ') found')
98 if programs[program][1] == '>=': 100 if programs[program][1] == '>=':
99 program_found_version = version_line.split('.') 101 program_found_version = version_line.split('.')
100 program_version_required = programs[program][2].split('.') 102 program_version_required = programs[program][2].split('.')
101 if len(program_version_required) == 3: 103 if len(program_version_required) == 3:
102 if len(program_found_version) == 2: 104 if len(program_found_version) == 2:
103 program_found_version.append(0) 105 program_found_version.append(0)
104 else: 106 else:
105 program_found_version[2] = program_found_version[2].split('_')[0] 107 program_found_version[2] = program_found_version[2].split('_')[0]
106 for i in range(0, len(program_version_required)): 108 for i in range(0, len(program_version_required)):
107 if int(program_found_version[i]) < int(program_version_required[i]): 109 if int(program_found_version[i]) > int(program_version_required[i]):
108 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2]) 110 break
111 elif int(program_found_version[i]) == int(program_version_required[i]):
112 continue
113 else:
114 listMissings.append('It is required ' + program + ' with version ' +
115 programs[program][1] + ' ' + programs[program][2])
109 else: 116 else:
110 if version_line != programs[program][2]: 117 if version_line != programs[program][2]:
111 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2]) 118 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] +
119 ' ' + programs[program][2])
112 return listMissings 120 return listMissings
113 121
114 122
115 def requiredPrograms(asperaKey, downloadCramBam): 123 def requiredPrograms(asperaKey, downloadCramBam, SRA, SRAopt):
116 programs_version_dictionary = {} 124 programs_version_dictionary = {}
117 programs_version_dictionary['wget'] = ['--version', '>=', '1.12'] 125 programs_version_dictionary['wget'] = ['--version', '>=', '1.12']
126 programs_version_dictionary['gzip'] = ['--version', '>=', '1.6']
118 programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9'] 127 programs_version_dictionary['bowtie2'] = ['--version', '>=', '2.2.9']
119 programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1'] 128 programs_version_dictionary['samtools'] = ['--version', '==', '1.3.1']
120 programs_version_dictionary['bcftools'] = ['--version', '==', '1.3.1'] 129 programs_version_dictionary['bcftools'] = ['--version', '==', '1.3.1']
121 if asperaKey is not None: 130 if asperaKey is not None:
122 programs_version_dictionary['ascp'] = ['--version', '>=', '3.6.1'] 131 programs_version_dictionary['ascp'] = ['--version', '>=', '3.6.1']
123 if downloadCramBam: 132 if SRA or SRAopt:
124 programs_version_dictionary['gzip'] = ['--version', '>=', '1.6'] 133 programs_version_dictionary['prefetch'] = ['--version', '>=', '2.8.2']
134 programs_version_dictionary['fastq-dump'] = ['--version', '>=', '2.8.2']
135 programs_version_dictionary['awk'] = ['--version', '>=', '3.0.4']
125 missingPrograms = checkPrograms(programs_version_dictionary) 136 missingPrograms = checkPrograms(programs_version_dictionary)
126 if len(missingPrograms) > 0: 137 if len(missingPrograms) > 0:
127 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) 138 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms))
128 139
129 140
130 def general_information(logfile, version, outdir, time_str, doNotUseProvidedSoftware, asperaKey, downloadCramBam): 141 def general_information(logfile, version, outdir, time_str, doNotUseProvidedSoftware, asperaKey, downloadCramBam, SRA, SRAopt):
131 # Check if output directory exists 142 # Check if output directory exists
132 143
133 print '\n' + '==========> ReMatCh <==========' 144 print('\n' + '==========> ReMatCh <==========')
134 print '\n' + 'Program start: ' + time.ctime() 145 print('\n' + 'Program start: ' + time.ctime())
135 146
136 # Tells where the logfile will be stored 147 # Tells where the logfile will be stored
137 print '\n' + 'LOGFILE:' 148 print('\n' + 'LOGFILE:')
138 print logfile 149 print(logfile)
139 150
140 # Print command 151 # Print command
141 print '\n' + 'COMMAND:' 152 print('\n' + 'COMMAND:')
142 script_path = os.path.abspath(sys.argv[0]) 153 script_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'rematch.py')
143 print sys.executable + ' ' + script_path + ' ' + ' '.join(sys.argv[1:]) 154 print(sys.executable + ' ' + ' '.join(sys.argv))
144 155
145 # Print directory where programme was lunch 156 # Print directory where programme was lunch
146 print '\n' + 'PRESENT DIRECTORY:' 157 print('\n' + 'PRESENT DIRECTORY:')
147 present_directory = os.path.abspath(os.getcwd()) 158 present_directory = os.path.abspath(os.getcwd())
148 print present_directory 159 print(present_directory)
149 160
150 # Print program version 161 # Print program version
151 print '\n' + 'VERSION:' 162 print('\n' + 'VERSION:')
152 scriptVersionGit(version, present_directory, script_path) 163 script_version_git(version, present_directory, script_path)
153 164
154 # Get CPU information 165 # Get CPU information
155 get_cpu_information(outdir, time_str) 166 get_cpu_information(outdir, time_str)
156 167
157 # Set and print PATH variable 168 # Set and print PATH variable
158 setPATHvariable(doNotUseProvidedSoftware, script_path) 169 setPATHvariable(doNotUseProvidedSoftware, script_path)
159 170
160 # Check programms 171 # Check programms
161 requiredPrograms(asperaKey, downloadCramBam) 172 requiredPrograms(asperaKey, downloadCramBam, SRA, SRAopt)
162 173
163 return script_path 174 return script_path
164 175
165 176
166 def scriptVersionGit(version, directory, script_path): 177 def script_version_git(version, current_directory, script_path, no_git_info=False):
167 print 'Version ' + version 178 """
168 179 Print script version and get GitHub commit information
169 try: 180
170 os.chdir(os.path.dirname(script_path)) 181 Parameters
171 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"'] 182 ----------
172 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) 183 version : str
173 print stdout 184 Version of the script, e.g. "4.0"
174 command = ['git', 'remote', 'show', 'origin'] 185 current_directory : str
175 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) 186 Path to the directory where the script was start to run
176 print stdout 187 script_path : str
177 os.chdir(directory) 188 Path to the script running
178 except: 189 no_git_info : bool, default False
179 print 'HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be obtained.' 190 True if it is not necessary to retreive the GitHub commit information
180 191
181 192 Returns
182 def runTime(start_time): 193 -------
194
195 """
196 print('Version {}'.format(version))
197
198 if not no_git_info:
199 try:
200 os.chdir(os.path.dirname(os.path.dirname(script_path)))
201 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"']
202 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, 15, False)
203 print(stdout)
204 command = ['git', 'remote', 'show', 'origin']
205 run_successfully, stdout, stderr = run_command_popen_communicate(command, False, 15, False)
206 print(stdout)
207 except:
208 print('HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be'
209 ' obtained.')
210 finally:
211 os.chdir(current_directory)
212
213
214 def run_time(start_time):
183 end_time = time.time() 215 end_time = time.time()
184 time_taken = end_time - start_time 216 time_taken = end_time - start_time
185 hours, rest = divmod(time_taken, 3600) 217 hours, rest = divmod(time_taken, 3600)
186 minutes, seconds = divmod(rest, 60) 218 minutes, seconds = divmod(rest, 60)
187 print 'Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's' 219 print('Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's')
188 return round(time_taken, 2) 220 return round(time_taken, 2)
189 221
190 222
191 def timer(function, name): 223 def timer(function, name):
192 @functools.wraps(function) 224 @functools_wraps(function)
193 def wrapper(*args, **kwargs): 225 def wrapper(*args, **kwargs):
194 print('\n' + 'RUNNING {0}\n'.format(name)) 226 print('\n' + 'RUNNING {0}\n'.format(name))
195 start_time = time.time() 227 start_time = time.time()
196 228
197 results = list(function(*args, **kwargs)) # guarantees return is a list to allow .insert() 229 results = list(function(*args, **kwargs)) # guarantees return is a list to allow .insert()
198 230
199 time_taken = runTime(start_time) 231 time_taken = run_time(start_time)
200 print('END {0}'.format(name)) 232 print('END {0}'.format(name))
201 233
202 results.insert(0, time_taken) 234 results.insert(0, time_taken)
203 return results 235 return results
204 return wrapper 236 return wrapper
205 237
206 238
207 def removeDirectory(directory): 239 def remove_directory(directory):
208 if os.path.isdir(directory): 240 if os.path.isdir(directory):
209 shutil.rmtree(directory) 241 shutil.rmtree(directory)
210 242
211 243
212 def saveVariableToPickle(variableToStore, outdir, prefix): 244 def save_variable_to_pickle(variableToStore, outdir, prefix):
213 pickleFile = os.path.join(outdir, str(prefix + '.pkl')) 245 pickleFile = os.path.join(outdir, str(prefix + '.pkl'))
214 with open(pickleFile, 'wb') as writer: 246 with open(pickleFile, 'wb') as writer:
215 pickle.dump(variableToStore, writer) 247 pickle.dump(variableToStore, writer)
216 248
217 249
218 def extractVariableFromPickle(pickleFile): 250 def extract_variable_from_pickle(pickleFile):
219 with open(pickleFile, 'rb') as reader: 251 with open(pickleFile, 'rb') as reader:
220 variable = pickle.load(reader) 252 variable = pickle.load(reader)
221 return variable 253 return variable
222 254
223 255
224 def trace_unhandled_exceptions(func): 256 def trace_unhandled_exceptions(func):
225 @functools.wraps(func) 257 @functools_wraps(func)
226 def wrapped_func(*args, **kwargs): 258 def wrapped_func(*args, **kwargs):
227 try: 259 try:
228 func(*args, **kwargs) 260 func(*args, **kwargs)
229 except: 261 except Exception as e:
230 print 'Exception in ' + func.__name__ 262 print('Exception in ' + func.__name__)
231 traceback.print_exc() 263 print(e)
264
265 exc_type, exc_value, exc_tb = sys.exc_info()
266 print(''.join(traceback_format_exception(exc_type, exc_value, exc_tb)))
267
268 raise exc_type(exc_value)
269
232 return wrapped_func 270 return wrapped_func
233 271
234 272
235 def kill_subprocess_Popen(subprocess_Popen, command): 273 def kill_subprocess_Popen(subprocess_Popen, command):
236 print 'Command run out of time: ' + str(command) 274 print('Command run out of time: ' + str(command))
237 subprocess_Popen.kill() 275 subprocess_Popen.kill()
238 276
239 277
240 def runCommandPopenCommunicate(command, shell_True, timeout_sec_None, print_comand_True): 278 def run_command_popen_communicate(command, shell_True, timeout_sec_None, print_comand_True):
241 run_successfully = False 279 run_successfully = False
242 if not isinstance(command, basestring): 280 if not isinstance(command, str):
243 command = ' '.join(command) 281 command = ' '.join(command)
244 command = shlex.split(command) 282 command = shlex.split(command)
245 283
246 if print_comand_True: 284 if print_comand_True:
247 print 'Running: ' + ' '.join(command) 285 print('Running: ' + ' '.join(command))
248 286
249 if shell_True: 287 if shell_True:
250 command = ' '.join(command) 288 command = ' '.join(command)
251 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) 289 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
252 else: 290 else:
254 292
255 not_killed_by_timer = True 293 not_killed_by_timer = True
256 if timeout_sec_None is None: 294 if timeout_sec_None is None:
257 stdout, stderr = proc.communicate() 295 stdout, stderr = proc.communicate()
258 else: 296 else:
259 timer = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,)) 297 time_counter = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,))
260 timer.start() 298 time_counter.start()
261 stdout, stderr = proc.communicate() 299 stdout, stderr = proc.communicate()
262 timer.cancel() 300 time_counter.cancel()
263 not_killed_by_timer = timer.isAlive() 301 not_killed_by_timer = time_counter.isAlive()
264 302
265 if proc.returncode == 0: 303 if proc.returncode == 0:
266 run_successfully = True 304 run_successfully = True
267 else: 305 else:
268 if not print_comand_True and not_killed_by_timer: 306 if not print_comand_True and not_killed_by_timer:
269 print 'Running: ' + str(command) 307 print('Running: ' + str(command))
270 if len(stdout) > 0: 308 if len(stdout) > 0:
271 print 'STDOUT' 309 print('STDOUT')
272 print stdout.decode("utf-8") 310 print(stdout.decode("utf-8"))
273 if len(stderr) > 0: 311 if len(stderr) > 0:
274 print 'STDERR' 312 print('STDERR')
275 print stderr.decode("utf-8") 313 print(stderr.decode("utf-8"))
276 return run_successfully, stdout, stderr 314 return run_successfully, stdout.decode("utf-8"), stderr.decode("utf-8")
277 315
278 316
279 def rchop(string, ending): 317 def rchop(string, ending):
280 if string.endswith(ending): 318 if string.endswith(ending):
281 string = string[:-len(ending)] 319 string = string[:-len(ending)]
283 321
284 322
285 def reverse_complement(seq): 323 def reverse_complement(seq):
286 complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'} 324 complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A', 'N': 'N'}
287 325
288 reverse_complement = '' 326 reverse_complement_string = ''
289 327
290 seq = reversed(list(seq.upper())) 328 seq = reversed(list(seq.upper()))
291 329
292 for base in seq: 330 for base in seq:
293 reverse_complement += complement[base] 331 reverse_complement_string += complement[base]
294 332
295 return reverse_complement 333 return reverse_complement_string