comparison scripts/modules/utils.py @ 3:0cbed1c0a762 draft default tip

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Tue, 28 Jan 2020 10:42:31 -0500
parents 965517909457
children
comparison
equal deleted inserted replaced
2:6837f733b4aa 3:0cbed1c0a762
35 def getLogFile(self): 35 def getLogFile(self):
36 return self.logfile 36 return self.logfile
37 37
38 38
39 def checkPrograms(programs_version_dictionary): 39 def checkPrograms(programs_version_dictionary):
40 print '\n' + 'Checking dependencies...' 40 print('\n' + 'Checking dependencies...')
41 programs = programs_version_dictionary 41 programs = programs_version_dictionary
42 which_program = ['which', ''] 42 which_program = ['which', '']
43 listMissings = [] 43 listMissings = []
44 for program in programs: 44 for program in programs:
45 which_program[1] = program 45 which_program[1] = program
46 run_successfully, stdout, stderr = runCommandPopenCommunicate(which_program, False, None, False) 46 run_successfully, stdout, stderr = runCommandPopenCommunicate(which_program, False, None, False)
47 if not run_successfully: 47 if not run_successfully:
48 listMissings.append(program + ' not found in PATH.') 48 listMissings.append(program + ' not found in PATH.')
49 else: 49 else:
50 print stdout.splitlines()[0] 50 print(stdout.splitlines()[0])
51 if programs[program][0] is None: 51 if programs[program][0] is None:
52 print program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0] 52 print(program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0])
53 else: 53 else:
54 if program.endswith('.jar'): 54 if program.endswith('.jar'):
55 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]] 55 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]]
56 programs[program].append(stdout.splitlines()[0]) 56 programs[program].append(stdout.splitlines()[0])
57 else: 57 else:
58 check_version = [stdout.splitlines()[0], programs[program][0]] 58 check_version = [stdout.splitlines()[0], programs[program][0]]
59 run_successfully, stdout, stderr = runCommandPopenCommunicate(check_version, False, None, False) 59 run_successfully, stdout, stderr = runCommandPopenCommunicate(check_version, False, None, False)
60 if stdout == '': 60 if stdout == '':
61 stdout = stderr 61 stdout = stderr
62 if program == 'wget': 62 if program in ['wget', 'awk']:
63 version_line = stdout.splitlines()[0].split(' ', 3)[2] 63 version_line = stdout.splitlines()[0].split(' ', 3)[2]
64 elif program in ['prefetch', 'fastq-dump']:
65 version_line = stdout.splitlines()[1].split(' ')[-1]
64 else: 66 else:
65 version_line = stdout.splitlines()[0].split(' ')[-1] 67 version_line = stdout.splitlines()[0].split(' ')[-1]
66 replace_characters = ['"', 'v', 'V', '+'] 68 replace_characters = ['"', 'v', 'V', '+', ',']
67 for i in replace_characters: 69 for i in replace_characters:
68 version_line = version_line.replace(i, '') 70 version_line = version_line.replace(i, '')
69 print program + ' (' + version_line + ') found' 71 print(program + ' (' + version_line + ') found')
70 if programs[program][1] == '>=': 72 if programs[program][1] == '>=':
71 program_found_version = version_line.split('.') 73 program_found_version = version_line.split('.')
72 program_version_required = programs[program][2].split('.') 74 program_version_required = programs[program][2].split('.')
73 if len(program_version_required) == 3: 75 if len(program_version_required) == 3:
74 if len(program_found_version) == 2: 76 if len(program_found_version) == 2:
75 program_found_version.append(0) 77 program_found_version.append(0)
76 else: 78 else:
77 program_found_version[2] = program_found_version[2].split('_')[0] 79 program_found_version[2] = program_found_version[2].split('_')[0]
78 for i in range(0, len(program_version_required)): 80 for i in range(0, len(program_version_required)):
79 if isinstance(program_found_version[i], (int, long)): 81 if int(program_found_version[i]) > int(program_version_required[i]):
80 if int(program_found_version[i]) < int(program_version_required[i]): 82 break
81 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2]) 83 elif int(program_found_version[i]) == int(program_version_required[i]):
84 continue
85 else:
86 listMissings.append('It is required ' + program + ' with version ' +
87 programs[program][1] + ' ' + programs[program][2])
82 else: 88 else:
83 if version_line != programs[program][2]: 89 if version_line != programs[program][2]:
84 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2]) 90 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] +
91 ' ' + programs[program][2])
85 return listMissings 92 return listMissings
86 93
87 94
88 def requiredPrograms(): 95 def requiredPrograms():
89 programs_version_dictionary = {} 96 programs_version_dictionary = {}
90 programs_version_dictionary['rematch.py'] = ['--version', '>=', '3.2'] 97 programs_version_dictionary['rematch.py'] = ['--version', '>=', '4.0']
91 missingPrograms = checkPrograms(programs_version_dictionary) 98 missingPrograms = checkPrograms(programs_version_dictionary)
92 if len(missingPrograms) > 0: 99 if len(missingPrograms) > 0:
93 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) 100 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms))
94 101
95 102
96 def general_information(logfile, version, outdir, time_str): 103 def general_information(logfile, version, outdir, time_str):
97 # Check if output directory exists 104 # Check if output directory exists
98 105
99 print '\n' + '==========> patho_typing <==========' 106 print('\n' + '==========> patho_typing <==========')
100 print '\n' + 'Program start: ' + time.ctime() 107 print('\n' + 'Program start: ' + time.ctime())
101 108
102 # Tells where the logfile will be stored 109 # Tells where the logfile will be stored
103 print '\n' + 'LOGFILE:' 110 print('\n' + 'LOGFILE:')
104 print logfile 111 print(logfile)
105 112
106 # Print command 113 # Print command
107 print '\n' + 'COMMAND:' 114 print('\n' + 'COMMAND:')
108 script_path = os.path.abspath(sys.argv[0]) 115 script_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'patho_typing.py')
109 print sys.executable + ' ' + script_path + ' ' + ' '.join(sys.argv[1:]) 116 print(sys.executable + ' ' + ' '.join(sys.argv))
110 117
111 # Print directory where programme was lunch 118 # Print directory where programme was lunch
112 print '\n' + 'PRESENT DIRECTORY:' 119 print('\n' + 'PRESENT DIRECTORY:')
113 present_directory = os.path.abspath(os.getcwd()) 120 present_directory = os.path.abspath(os.getcwd())
114 print present_directory 121 print(present_directory)
115 122
116 # Print program version 123 # Print program version
117 print '\n' + 'VERSION:' 124 print('\n' + 'VERSION:')
118 scriptVersionGit(version, present_directory, script_path) 125 script_version_git(version, present_directory, script_path)
119 126
120 # Check programms 127 # Check programms
121 requiredPrograms() 128 requiredPrograms()
122 129
123 return script_path 130 return script_path
133 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin') 140 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin')
134 141
135 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable])) 142 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable]))
136 143
137 # Print PATH variable 144 # Print PATH variable
138 print '\n' + 'PATH variable:' 145 print('\n' + 'PATH variable:')
139 print os.environ['PATH'] 146 print(os.environ['PATH'])
140 147
141 148
142 def scriptVersionGit(version, directory, script_path): 149 def script_version_git(version, current_directory, script_path, no_git_info=False):
143 print 'Version ' + version 150 """
144 151 Print script version and get GitHub commit information
145 try: 152
146 os.chdir(os.path.dirname(script_path)) 153 Parameters
147 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"'] 154 ----------
148 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) 155 version : str
149 print stdout 156 Version of the script, e.g. "4.0"
150 command = ['git', 'remote', 'show', 'origin'] 157 current_directory : str
151 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) 158 Path to the directory where the script was start to run
152 print stdout 159 script_path : str
153 os.chdir(directory) 160 Path to the script running
154 except: 161 no_git_info : bool, default False
155 print 'HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be obtained.' 162 True if it is not necessary to retreive the GitHub commit information
163
164 Returns
165 -------
166
167 """
168 print('Version {}'.format(version))
169
170 if not no_git_info:
171 try:
172 os.chdir(os.path.dirname(os.path.dirname(script_path)))
173 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"']
174 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False)
175 print(stdout)
176 command = ['git', 'remote', 'show', 'origin']
177 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False)
178 print(stdout)
179 except:
180 print('HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be'
181 ' obtained.')
182 finally:
183 os.chdir(current_directory)
156 184
157 185
158 def runTime(start_time): 186 def runTime(start_time):
159 end_time = time.time() 187 end_time = time.time()
160 time_taken = end_time - start_time 188 time_taken = end_time - start_time
161 hours, rest = divmod(time_taken, 3600) 189 hours, rest = divmod(time_taken, 3600)
162 minutes, seconds = divmod(rest, 60) 190 minutes, seconds = divmod(rest, 60)
163 print 'Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's' 191 print('Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's')
164 return round(time_taken, 2) 192 return round(time_taken, 2)
165 193
166 194
167 def timer(function, name): 195 def timer(function, name):
168 @functools.wraps(function) 196 @functools.wraps(function)
200 @functools.wraps(func) 228 @functools.wraps(func)
201 def wrapped_func(*args, **kwargs): 229 def wrapped_func(*args, **kwargs):
202 try: 230 try:
203 func(*args, **kwargs) 231 func(*args, **kwargs)
204 except: 232 except:
205 print 'Exception in ' + func.__name__ 233 print('Exception in ' + func.__name__)
206 traceback.print_exc() 234 traceback.print_exc()
207 return wrapped_func 235 return wrapped_func
208 236
209 237
210 def kill_subprocess_Popen(subprocess_Popen, command): 238 def kill_subprocess_Popen(subprocess_Popen, command):
211 print 'Command run out of time: ' + str(command) 239 print('Command run out of time: ' + str(command))
212 subprocess_Popen.kill() 240 subprocess_Popen.kill()
213 241
214 242
215 def runCommandPopenCommunicate(command, shell_True, timeout_sec_None, print_comand_True): 243 def runCommandPopenCommunicate(command, shell_True, timeout_sec_None, print_comand_True):
216 run_successfully = False 244 run_successfully = False
217 if not isinstance(command, basestring): 245 if not isinstance(command, str):
218 command = ' '.join(command) 246 command = ' '.join(command)
219 command = shlex.split(command) 247 command = shlex.split(command)
220 248
221 if print_comand_True: 249 if print_comand_True:
222 print 'Running: ' + ' '.join(command) 250 print('Running: ' + ' '.join(command))
223 251
224 if shell_True: 252 if shell_True:
225 command = ' '.join(command) 253 command = ' '.join(command)
226 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) 254 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
227 else: 255 else:
229 257
230 not_killed_by_timer = True 258 not_killed_by_timer = True
231 if timeout_sec_None is None: 259 if timeout_sec_None is None:
232 stdout, stderr = proc.communicate() 260 stdout, stderr = proc.communicate()
233 else: 261 else:
234 timer = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,)) 262 time_counter = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,))
235 timer.start() 263 time_counter.start()
236 stdout, stderr = proc.communicate() 264 stdout, stderr = proc.communicate()
237 timer.cancel() 265 time_counter.cancel()
238 not_killed_by_timer = timer.isAlive() 266 not_killed_by_timer = time_counter.isAlive()
267
268 stdout = stdout.decode("utf-8")
269 stderr = stderr.decode("utf-8")
239 270
240 if proc.returncode == 0: 271 if proc.returncode == 0:
241 run_successfully = True 272 run_successfully = True
242 else: 273 else:
243 if not print_comand_True and not_killed_by_timer: 274 if not print_comand_True and not_killed_by_timer:
244 print 'Running: ' + str(command) 275 print('Running: ' + str(command))
245 if len(stdout) > 0: 276 if len(stdout) > 0:
246 print 'STDOUT' 277 print('STDOUT')
247 print stdout.decode("utf-8") 278 print(stdout)
248 if len(stderr) > 0: 279 if len(stderr) > 0:
249 print 'STDERR' 280 print('STDERR')
250 print stderr.decode("utf-8") 281 print(stderr)
251 return run_successfully, stdout, stderr 282 return run_successfully, stdout, stderr
252 283
253 284
254 def required_length(tuple_length_options, argument_name): 285 def required_length(tuple_length_options, argument_name):
255 class RequiredLength(argparse.Action): 286 class RequiredLength(argparse.Action):
256 def __call__(self, parser, args, values, option_string=None): 287 def __call__(self, parser, args, values, option_string=None):
257 if len(values) not in tuple_length_options: 288 if len(values) not in tuple_length_options:
258 msg = 'Option {argument_name} requires one of the following number of arguments: {tuple_length_options}'.format( 289 msg = 'Option {argument_name} requires one of the following number of' \
259 argument_name=self.argument_name, tuple_length_options=tuple_length_options) 290 ' arguments: {tuple_length_options}'.format(argument_name=self.argument_name,
291 tuple_length_options=tuple_length_options)
260 raise argparse.ArgumentTypeError(msg) 292 raise argparse.ArgumentTypeError(msg)
261 setattr(args, self.dest, values) 293 setattr(args, self.dest, values)
262 return RequiredLength 294 return RequiredLength
263 295
264 296
274 line = line.splitlines()[0] 306 line = line.splitlines()[0]
275 if len(line) > 0: 307 if len(line) > 0:
276 if not blank_line_found: 308 if not blank_line_found:
277 if line.startswith('>'): 309 if line.startswith('>'):
278 if len(temp_sequence_dict) > 0: 310 if len(temp_sequence_dict) > 0:
279 if temp_sequence_dict.values()[0]['length'] - 2 * length_extra_seq > 0: 311 if list(temp_sequence_dict.values())[0]['length'] - 2 * length_extra_seq > 0:
280 sequence_dict[temp_sequence_dict.keys()[0]] = temp_sequence_dict.values()[0] 312 sequence_dict[list(temp_sequence_dict.keys())[0]] = list(temp_sequence_dict.values())[0]
281 headers[temp_sequence_dict.values()[0]['header'].lower()] = sequence_counter 313 headers[list(temp_sequence_dict.values())[0]['header'].lower()] = sequence_counter
282 else: 314 else:
283 print temp_sequence_dict.values()[0]['header'] + ' sequence ignored due to length <= 0' 315 print(list(temp_sequence_dict.values())[0]['header'] + ' sequence ignored due to '
316 'length <= 0')
284 temp_sequence_dict = {} 317 temp_sequence_dict = {}
285 318
286 if line[1:].lower() in headers: 319 if line[1:].lower() in headers:
287 sys.exit('Found duplicated sequence headers') 320 sys.exit('Found duplicated sequence headers')
288 321
295 sys.exit('It was found a blank line between the fasta file above line ' + line) 328 sys.exit('It was found a blank line between the fasta file above line ' + line)
296 else: 329 else:
297 blank_line_found = True 330 blank_line_found = True
298 331
299 if len(temp_sequence_dict) > 0: 332 if len(temp_sequence_dict) > 0:
300 if temp_sequence_dict.values()[0]['length'] - 2 * length_extra_seq > 0: 333 if list(temp_sequence_dict.values())[0]['length'] - 2 * length_extra_seq > 0:
301 sequence_dict[temp_sequence_dict.keys()[0]] = temp_sequence_dict.values()[0] 334 sequence_dict[list(temp_sequence_dict.keys())[0]] = list(temp_sequence_dict.values())[0]
302 headers[temp_sequence_dict.values()[0]['header'].lower()] = sequence_counter 335 headers[list(temp_sequence_dict.values())[0]['header'].lower()] = sequence_counter
303 else: 336 else:
304 print temp_sequence_dict.values()[0]['header'] + ' sequence ignored due to length <= 0' 337 print(list(temp_sequence_dict.values())[0]['header'] + ' sequence ignored due to length <= 0')
305 338
306 return sequence_dict, headers 339 return sequence_dict, headers
307 340
308 341
309 def simplify_sequence_dict(sequence_dict): 342 def simplify_sequence_dict(sequence_dict):
310 simple_sequence_dict = {} 343 simple_sequence_dict = {}
311 for counter, info in sequence_dict.items(): 344 for counter, info in list(sequence_dict.items()):
312 simple_sequence_dict[info['header']] = info 345 simple_sequence_dict[info['header']] = info
313 del simple_sequence_dict[info['header']]['header'] 346 del simple_sequence_dict[info['header']]['header']
314 return simple_sequence_dict 347 return simple_sequence_dict
315 348
316 349
330 sequence_dict[i]['header'] = sequence_dict[i]['header'].replace(x, '_') 363 sequence_dict[i]['header'] = sequence_dict[i]['header'].replace(x, '_')
331 headers_changed = True 364 headers_changed = True
332 new_headers[sequence_dict[i]['header'].lower()] = i 365 new_headers[sequence_dict[i]['header'].lower()] = i
333 366
334 if headers_changed: 367 if headers_changed:
335 print 'At least one of the those characters was found. Replacing those with _' + '\n' 368 print('At least one of the those characters was found. Replacing those with _' + '\n')
336 369
337 return sequence_dict, new_headers 370 return sequence_dict, new_headers