Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
diff scripts/modules/utils.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Tue, 28 Jan 2020 10:42:31 -0500 |
parents | 965517909457 |
children |
line wrap: on
line diff
--- a/scripts/modules/utils.py Wed Jan 22 09:10:12 2020 -0500 +++ b/scripts/modules/utils.py Tue Jan 28 10:42:31 2020 -0500 @@ -37,7 +37,7 @@ def checkPrograms(programs_version_dictionary): - print '\n' + 'Checking dependencies...' + print('\n' + 'Checking dependencies...') programs = programs_version_dictionary which_program = ['which', ''] listMissings = [] @@ -47,9 +47,9 @@ if not run_successfully: listMissings.append(program + ' not found in PATH.') else: - print stdout.splitlines()[0] + print(stdout.splitlines()[0]) if programs[program][0] is None: - print program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0] + print(program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0]) else: if program.endswith('.jar'): check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]] @@ -59,14 +59,16 @@ run_successfully, stdout, stderr = runCommandPopenCommunicate(check_version, False, None, False) if stdout == '': stdout = stderr - if program == 'wget': + if program in ['wget', 'awk']: version_line = stdout.splitlines()[0].split(' ', 3)[2] + elif program in ['prefetch', 'fastq-dump']: + version_line = stdout.splitlines()[1].split(' ')[-1] else: version_line = stdout.splitlines()[0].split(' ')[-1] - replace_characters = ['"', 'v', 'V', '+'] + replace_characters = ['"', 'v', 'V', '+', ','] for i in replace_characters: version_line = version_line.replace(i, '') - print program + ' (' + version_line + ') found' + print(program + ' (' + version_line + ') found') if programs[program][1] == '>=': program_found_version = version_line.split('.') program_version_required = programs[program][2].split('.') @@ -74,20 +76,25 @@ if len(program_found_version) == 2: program_found_version.append(0) else: - program_found_version[2] = program_found_version[2].split('_')[0] + program_found_version[2] = program_found_version[2].split('_')[0] for i in range(0, len(program_version_required)): - if isinstance(program_found_version[i], (int, long)): - if int(program_found_version[i]) < int(program_version_required[i]): - listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2]) + if int(program_found_version[i]) > int(program_version_required[i]): + break + elif int(program_found_version[i]) == int(program_version_required[i]): + continue + else: + listMissings.append('It is required ' + program + ' with version ' + + programs[program][1] + ' ' + programs[program][2]) else: if version_line != programs[program][2]: - listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2]) + listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + + ' ' + programs[program][2]) return listMissings def requiredPrograms(): programs_version_dictionary = {} - programs_version_dictionary['rematch.py'] = ['--version', '>=', '3.2'] + programs_version_dictionary['rematch.py'] = ['--version', '>=', '4.0'] missingPrograms = checkPrograms(programs_version_dictionary) if len(missingPrograms) > 0: sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) @@ -96,26 +103,26 @@ def general_information(logfile, version, outdir, time_str): # Check if output directory exists - print '\n' + '==========> patho_typing <==========' - print '\n' + 'Program start: ' + time.ctime() + print('\n' + '==========> patho_typing <==========') + print('\n' + 'Program start: ' + time.ctime()) # Tells where the logfile will be stored - print '\n' + 'LOGFILE:' - print logfile + print('\n' + 'LOGFILE:') + print(logfile) # Print command - print '\n' + 'COMMAND:' - script_path = os.path.abspath(sys.argv[0]) - print sys.executable + ' ' + script_path + ' ' + ' '.join(sys.argv[1:]) + print('\n' + 'COMMAND:') + script_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'patho_typing.py') + print(sys.executable + ' ' + ' '.join(sys.argv)) # Print directory where programme was lunch - print '\n' + 'PRESENT DIRECTORY:' + print('\n' + 'PRESENT DIRECTORY:') present_directory = os.path.abspath(os.getcwd()) - print present_directory + print(present_directory) # Print program version - print '\n' + 'VERSION:' - scriptVersionGit(version, present_directory, script_path) + print('\n' + 'VERSION:') + script_version_git(version, present_directory, script_path) # Check programms requiredPrograms() @@ -135,24 +142,45 @@ os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable])) # Print PATH variable - print '\n' + 'PATH variable:' - print os.environ['PATH'] + print('\n' + 'PATH variable:') + print(os.environ['PATH']) -def scriptVersionGit(version, directory, script_path): - print 'Version ' + version +def script_version_git(version, current_directory, script_path, no_git_info=False): + """ + Print script version and get GitHub commit information + + Parameters + ---------- + version : str + Version of the script, e.g. "4.0" + current_directory : str + Path to the directory where the script was start to run + script_path : str + Path to the script running + no_git_info : bool, default False + True if it is not necessary to retreive the GitHub commit information + + Returns + ------- - try: - os.chdir(os.path.dirname(script_path)) - command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"'] - run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) - print stdout - command = ['git', 'remote', 'show', 'origin'] - run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) - print stdout - os.chdir(directory) - except: - print 'HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be obtained.' + """ + print('Version {}'.format(version)) + + if not no_git_info: + try: + os.chdir(os.path.dirname(os.path.dirname(script_path))) + command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"'] + run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) + print(stdout) + command = ['git', 'remote', 'show', 'origin'] + run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) + print(stdout) + except: + print('HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be' + ' obtained.') + finally: + os.chdir(current_directory) def runTime(start_time): @@ -160,7 +188,7 @@ time_taken = end_time - start_time hours, rest = divmod(time_taken, 3600) minutes, seconds = divmod(rest, 60) - print 'Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's' + print('Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's') return round(time_taken, 2) @@ -202,24 +230,24 @@ try: func(*args, **kwargs) except: - print 'Exception in ' + func.__name__ + print('Exception in ' + func.__name__) traceback.print_exc() return wrapped_func def kill_subprocess_Popen(subprocess_Popen, command): - print 'Command run out of time: ' + str(command) + print('Command run out of time: ' + str(command)) subprocess_Popen.kill() def runCommandPopenCommunicate(command, shell_True, timeout_sec_None, print_comand_True): run_successfully = False - if not isinstance(command, basestring): + if not isinstance(command, str): command = ' '.join(command) command = shlex.split(command) if print_comand_True: - print 'Running: ' + ' '.join(command) + print('Running: ' + ' '.join(command)) if shell_True: command = ' '.join(command) @@ -231,23 +259,26 @@ if timeout_sec_None is None: stdout, stderr = proc.communicate() else: - timer = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,)) - timer.start() + time_counter = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,)) + time_counter.start() stdout, stderr = proc.communicate() - timer.cancel() - not_killed_by_timer = timer.isAlive() + time_counter.cancel() + not_killed_by_timer = time_counter.isAlive() + + stdout = stdout.decode("utf-8") + stderr = stderr.decode("utf-8") if proc.returncode == 0: run_successfully = True else: if not print_comand_True and not_killed_by_timer: - print 'Running: ' + str(command) + print('Running: ' + str(command)) if len(stdout) > 0: - print 'STDOUT' - print stdout.decode("utf-8") + print('STDOUT') + print(stdout) if len(stderr) > 0: - print 'STDERR' - print stderr.decode("utf-8") + print('STDERR') + print(stderr) return run_successfully, stdout, stderr @@ -255,8 +286,9 @@ class RequiredLength(argparse.Action): def __call__(self, parser, args, values, option_string=None): if len(values) not in tuple_length_options: - msg = 'Option {argument_name} requires one of the following number of arguments: {tuple_length_options}'.format( - argument_name=self.argument_name, tuple_length_options=tuple_length_options) + msg = 'Option {argument_name} requires one of the following number of' \ + ' arguments: {tuple_length_options}'.format(argument_name=self.argument_name, + tuple_length_options=tuple_length_options) raise argparse.ArgumentTypeError(msg) setattr(args, self.dest, values) return RequiredLength @@ -276,11 +308,12 @@ if not blank_line_found: if line.startswith('>'): if len(temp_sequence_dict) > 0: - if temp_sequence_dict.values()[0]['length'] - 2 * length_extra_seq > 0: - sequence_dict[temp_sequence_dict.keys()[0]] = temp_sequence_dict.values()[0] - headers[temp_sequence_dict.values()[0]['header'].lower()] = sequence_counter + if list(temp_sequence_dict.values())[0]['length'] - 2 * length_extra_seq > 0: + sequence_dict[list(temp_sequence_dict.keys())[0]] = list(temp_sequence_dict.values())[0] + headers[list(temp_sequence_dict.values())[0]['header'].lower()] = sequence_counter else: - print temp_sequence_dict.values()[0]['header'] + ' sequence ignored due to length <= 0' + print(list(temp_sequence_dict.values())[0]['header'] + ' sequence ignored due to ' + 'length <= 0') temp_sequence_dict = {} if line[1:].lower() in headers: @@ -297,18 +330,18 @@ blank_line_found = True if len(temp_sequence_dict) > 0: - if temp_sequence_dict.values()[0]['length'] - 2 * length_extra_seq > 0: - sequence_dict[temp_sequence_dict.keys()[0]] = temp_sequence_dict.values()[0] - headers[temp_sequence_dict.values()[0]['header'].lower()] = sequence_counter + if list(temp_sequence_dict.values())[0]['length'] - 2 * length_extra_seq > 0: + sequence_dict[list(temp_sequence_dict.keys())[0]] = list(temp_sequence_dict.values())[0] + headers[list(temp_sequence_dict.values())[0]['header'].lower()] = sequence_counter else: - print temp_sequence_dict.values()[0]['header'] + ' sequence ignored due to length <= 0' + print(list(temp_sequence_dict.values())[0]['header'] + ' sequence ignored due to length <= 0') return sequence_dict, headers def simplify_sequence_dict(sequence_dict): simple_sequence_dict = {} - for counter, info in sequence_dict.items(): + for counter, info in list(sequence_dict.items()): simple_sequence_dict[info['header']] = info del simple_sequence_dict[info['header']]['header'] return simple_sequence_dict @@ -332,6 +365,6 @@ new_headers[sequence_dict[i]['header'].lower()] = i if headers_changed: - print 'At least one of the those characters was found. Replacing those with _' + '\n' + print('At least one of the those characters was found. Replacing those with _' + '\n') return sequence_dict, new_headers