Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
comparison scripts/modules/utils.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Tue, 28 Jan 2020 10:42:31 -0500 |
parents | 965517909457 |
children |
comparison
equal
deleted
inserted
replaced
2:6837f733b4aa | 3:0cbed1c0a762 |
---|---|
35 def getLogFile(self): | 35 def getLogFile(self): |
36 return self.logfile | 36 return self.logfile |
37 | 37 |
38 | 38 |
39 def checkPrograms(programs_version_dictionary): | 39 def checkPrograms(programs_version_dictionary): |
40 print '\n' + 'Checking dependencies...' | 40 print('\n' + 'Checking dependencies...') |
41 programs = programs_version_dictionary | 41 programs = programs_version_dictionary |
42 which_program = ['which', ''] | 42 which_program = ['which', ''] |
43 listMissings = [] | 43 listMissings = [] |
44 for program in programs: | 44 for program in programs: |
45 which_program[1] = program | 45 which_program[1] = program |
46 run_successfully, stdout, stderr = runCommandPopenCommunicate(which_program, False, None, False) | 46 run_successfully, stdout, stderr = runCommandPopenCommunicate(which_program, False, None, False) |
47 if not run_successfully: | 47 if not run_successfully: |
48 listMissings.append(program + ' not found in PATH.') | 48 listMissings.append(program + ' not found in PATH.') |
49 else: | 49 else: |
50 print stdout.splitlines()[0] | 50 print(stdout.splitlines()[0]) |
51 if programs[program][0] is None: | 51 if programs[program][0] is None: |
52 print program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0] | 52 print(program + ' (impossible to determine programme version) found at: ' + stdout.splitlines()[0]) |
53 else: | 53 else: |
54 if program.endswith('.jar'): | 54 if program.endswith('.jar'): |
55 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]] | 55 check_version = ['java', '-jar', stdout.splitlines()[0], programs[program][0]] |
56 programs[program].append(stdout.splitlines()[0]) | 56 programs[program].append(stdout.splitlines()[0]) |
57 else: | 57 else: |
58 check_version = [stdout.splitlines()[0], programs[program][0]] | 58 check_version = [stdout.splitlines()[0], programs[program][0]] |
59 run_successfully, stdout, stderr = runCommandPopenCommunicate(check_version, False, None, False) | 59 run_successfully, stdout, stderr = runCommandPopenCommunicate(check_version, False, None, False) |
60 if stdout == '': | 60 if stdout == '': |
61 stdout = stderr | 61 stdout = stderr |
62 if program == 'wget': | 62 if program in ['wget', 'awk']: |
63 version_line = stdout.splitlines()[0].split(' ', 3)[2] | 63 version_line = stdout.splitlines()[0].split(' ', 3)[2] |
64 elif program in ['prefetch', 'fastq-dump']: | |
65 version_line = stdout.splitlines()[1].split(' ')[-1] | |
64 else: | 66 else: |
65 version_line = stdout.splitlines()[0].split(' ')[-1] | 67 version_line = stdout.splitlines()[0].split(' ')[-1] |
66 replace_characters = ['"', 'v', 'V', '+'] | 68 replace_characters = ['"', 'v', 'V', '+', ','] |
67 for i in replace_characters: | 69 for i in replace_characters: |
68 version_line = version_line.replace(i, '') | 70 version_line = version_line.replace(i, '') |
69 print program + ' (' + version_line + ') found' | 71 print(program + ' (' + version_line + ') found') |
70 if programs[program][1] == '>=': | 72 if programs[program][1] == '>=': |
71 program_found_version = version_line.split('.') | 73 program_found_version = version_line.split('.') |
72 program_version_required = programs[program][2].split('.') | 74 program_version_required = programs[program][2].split('.') |
73 if len(program_version_required) == 3: | 75 if len(program_version_required) == 3: |
74 if len(program_found_version) == 2: | 76 if len(program_found_version) == 2: |
75 program_found_version.append(0) | 77 program_found_version.append(0) |
76 else: | 78 else: |
77 program_found_version[2] = program_found_version[2].split('_')[0] | 79 program_found_version[2] = program_found_version[2].split('_')[0] |
78 for i in range(0, len(program_version_required)): | 80 for i in range(0, len(program_version_required)): |
79 if isinstance(program_found_version[i], (int, long)): | 81 if int(program_found_version[i]) > int(program_version_required[i]): |
80 if int(program_found_version[i]) < int(program_version_required[i]): | 82 break |
81 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2]) | 83 elif int(program_found_version[i]) == int(program_version_required[i]): |
84 continue | |
85 else: | |
86 listMissings.append('It is required ' + program + ' with version ' + | |
87 programs[program][1] + ' ' + programs[program][2]) | |
82 else: | 88 else: |
83 if version_line != programs[program][2]: | 89 if version_line != programs[program][2]: |
84 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + ' ' + programs[program][2]) | 90 listMissings.append('It is required ' + program + ' with version ' + programs[program][1] + |
91 ' ' + programs[program][2]) | |
85 return listMissings | 92 return listMissings |
86 | 93 |
87 | 94 |
88 def requiredPrograms(): | 95 def requiredPrograms(): |
89 programs_version_dictionary = {} | 96 programs_version_dictionary = {} |
90 programs_version_dictionary['rematch.py'] = ['--version', '>=', '3.2'] | 97 programs_version_dictionary['rematch.py'] = ['--version', '>=', '4.0'] |
91 missingPrograms = checkPrograms(programs_version_dictionary) | 98 missingPrograms = checkPrograms(programs_version_dictionary) |
92 if len(missingPrograms) > 0: | 99 if len(missingPrograms) > 0: |
93 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) | 100 sys.exit('\n' + 'Errors:' + '\n' + '\n'.join(missingPrograms)) |
94 | 101 |
95 | 102 |
96 def general_information(logfile, version, outdir, time_str): | 103 def general_information(logfile, version, outdir, time_str): |
97 # Check if output directory exists | 104 # Check if output directory exists |
98 | 105 |
99 print '\n' + '==========> patho_typing <==========' | 106 print('\n' + '==========> patho_typing <==========') |
100 print '\n' + 'Program start: ' + time.ctime() | 107 print('\n' + 'Program start: ' + time.ctime()) |
101 | 108 |
102 # Tells where the logfile will be stored | 109 # Tells where the logfile will be stored |
103 print '\n' + 'LOGFILE:' | 110 print('\n' + 'LOGFILE:') |
104 print logfile | 111 print(logfile) |
105 | 112 |
106 # Print command | 113 # Print command |
107 print '\n' + 'COMMAND:' | 114 print('\n' + 'COMMAND:') |
108 script_path = os.path.abspath(sys.argv[0]) | 115 script_path = os.path.join(os.path.dirname(os.path.dirname(os.path.realpath(__file__))), 'patho_typing.py') |
109 print sys.executable + ' ' + script_path + ' ' + ' '.join(sys.argv[1:]) | 116 print(sys.executable + ' ' + ' '.join(sys.argv)) |
110 | 117 |
111 # Print directory where programme was lunch | 118 # Print directory where programme was lunch |
112 print '\n' + 'PRESENT DIRECTORY:' | 119 print('\n' + 'PRESENT DIRECTORY:') |
113 present_directory = os.path.abspath(os.getcwd()) | 120 present_directory = os.path.abspath(os.getcwd()) |
114 print present_directory | 121 print(present_directory) |
115 | 122 |
116 # Print program version | 123 # Print program version |
117 print '\n' + 'VERSION:' | 124 print('\n' + 'VERSION:') |
118 scriptVersionGit(version, present_directory, script_path) | 125 script_version_git(version, present_directory, script_path) |
119 | 126 |
120 # Check programms | 127 # Check programms |
121 requiredPrograms() | 128 requiredPrograms() |
122 | 129 |
123 return script_path | 130 return script_path |
133 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin') | 140 bcftools = os.path.join(script_folder, 'src', 'bcftools-1.3.1', 'bin') |
134 | 141 |
135 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable])) | 142 os.environ['PATH'] = str(':'.join([bowtie2, samtools, bcftools, path_variable])) |
136 | 143 |
137 # Print PATH variable | 144 # Print PATH variable |
138 print '\n' + 'PATH variable:' | 145 print('\n' + 'PATH variable:') |
139 print os.environ['PATH'] | 146 print(os.environ['PATH']) |
140 | 147 |
141 | 148 |
142 def scriptVersionGit(version, directory, script_path): | 149 def script_version_git(version, current_directory, script_path, no_git_info=False): |
143 print 'Version ' + version | 150 """ |
144 | 151 Print script version and get GitHub commit information |
145 try: | 152 |
146 os.chdir(os.path.dirname(script_path)) | 153 Parameters |
147 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"'] | 154 ---------- |
148 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) | 155 version : str |
149 print stdout | 156 Version of the script, e.g. "4.0" |
150 command = ['git', 'remote', 'show', 'origin'] | 157 current_directory : str |
151 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) | 158 Path to the directory where the script was start to run |
152 print stdout | 159 script_path : str |
153 os.chdir(directory) | 160 Path to the script running |
154 except: | 161 no_git_info : bool, default False |
155 print 'HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be obtained.' | 162 True if it is not necessary to retreive the GitHub commit information |
163 | |
164 Returns | |
165 ------- | |
166 | |
167 """ | |
168 print('Version {}'.format(version)) | |
169 | |
170 if not no_git_info: | |
171 try: | |
172 os.chdir(os.path.dirname(os.path.dirname(script_path))) | |
173 command = ['git', 'log', '-1', '--date=local', '--pretty=format:"%h (%H) - Commit by %cn, %cd) : %s"'] | |
174 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) | |
175 print(stdout) | |
176 command = ['git', 'remote', 'show', 'origin'] | |
177 run_successfully, stdout, stderr = runCommandPopenCommunicate(command, False, 15, False) | |
178 print(stdout) | |
179 except: | |
180 print('HARMLESS WARNING: git command possibly not found. The GitHub repository information will not be' | |
181 ' obtained.') | |
182 finally: | |
183 os.chdir(current_directory) | |
156 | 184 |
157 | 185 |
158 def runTime(start_time): | 186 def runTime(start_time): |
159 end_time = time.time() | 187 end_time = time.time() |
160 time_taken = end_time - start_time | 188 time_taken = end_time - start_time |
161 hours, rest = divmod(time_taken, 3600) | 189 hours, rest = divmod(time_taken, 3600) |
162 minutes, seconds = divmod(rest, 60) | 190 minutes, seconds = divmod(rest, 60) |
163 print 'Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's' | 191 print('Runtime :' + str(hours) + 'h:' + str(minutes) + 'm:' + str(round(seconds, 2)) + 's') |
164 return round(time_taken, 2) | 192 return round(time_taken, 2) |
165 | 193 |
166 | 194 |
167 def timer(function, name): | 195 def timer(function, name): |
168 @functools.wraps(function) | 196 @functools.wraps(function) |
200 @functools.wraps(func) | 228 @functools.wraps(func) |
201 def wrapped_func(*args, **kwargs): | 229 def wrapped_func(*args, **kwargs): |
202 try: | 230 try: |
203 func(*args, **kwargs) | 231 func(*args, **kwargs) |
204 except: | 232 except: |
205 print 'Exception in ' + func.__name__ | 233 print('Exception in ' + func.__name__) |
206 traceback.print_exc() | 234 traceback.print_exc() |
207 return wrapped_func | 235 return wrapped_func |
208 | 236 |
209 | 237 |
210 def kill_subprocess_Popen(subprocess_Popen, command): | 238 def kill_subprocess_Popen(subprocess_Popen, command): |
211 print 'Command run out of time: ' + str(command) | 239 print('Command run out of time: ' + str(command)) |
212 subprocess_Popen.kill() | 240 subprocess_Popen.kill() |
213 | 241 |
214 | 242 |
215 def runCommandPopenCommunicate(command, shell_True, timeout_sec_None, print_comand_True): | 243 def runCommandPopenCommunicate(command, shell_True, timeout_sec_None, print_comand_True): |
216 run_successfully = False | 244 run_successfully = False |
217 if not isinstance(command, basestring): | 245 if not isinstance(command, str): |
218 command = ' '.join(command) | 246 command = ' '.join(command) |
219 command = shlex.split(command) | 247 command = shlex.split(command) |
220 | 248 |
221 if print_comand_True: | 249 if print_comand_True: |
222 print 'Running: ' + ' '.join(command) | 250 print('Running: ' + ' '.join(command)) |
223 | 251 |
224 if shell_True: | 252 if shell_True: |
225 command = ' '.join(command) | 253 command = ' '.join(command) |
226 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) | 254 proc = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) |
227 else: | 255 else: |
229 | 257 |
230 not_killed_by_timer = True | 258 not_killed_by_timer = True |
231 if timeout_sec_None is None: | 259 if timeout_sec_None is None: |
232 stdout, stderr = proc.communicate() | 260 stdout, stderr = proc.communicate() |
233 else: | 261 else: |
234 timer = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,)) | 262 time_counter = Timer(timeout_sec_None, kill_subprocess_Popen, args=(proc, command,)) |
235 timer.start() | 263 time_counter.start() |
236 stdout, stderr = proc.communicate() | 264 stdout, stderr = proc.communicate() |
237 timer.cancel() | 265 time_counter.cancel() |
238 not_killed_by_timer = timer.isAlive() | 266 not_killed_by_timer = time_counter.isAlive() |
267 | |
268 stdout = stdout.decode("utf-8") | |
269 stderr = stderr.decode("utf-8") | |
239 | 270 |
240 if proc.returncode == 0: | 271 if proc.returncode == 0: |
241 run_successfully = True | 272 run_successfully = True |
242 else: | 273 else: |
243 if not print_comand_True and not_killed_by_timer: | 274 if not print_comand_True and not_killed_by_timer: |
244 print 'Running: ' + str(command) | 275 print('Running: ' + str(command)) |
245 if len(stdout) > 0: | 276 if len(stdout) > 0: |
246 print 'STDOUT' | 277 print('STDOUT') |
247 print stdout.decode("utf-8") | 278 print(stdout) |
248 if len(stderr) > 0: | 279 if len(stderr) > 0: |
249 print 'STDERR' | 280 print('STDERR') |
250 print stderr.decode("utf-8") | 281 print(stderr) |
251 return run_successfully, stdout, stderr | 282 return run_successfully, stdout, stderr |
252 | 283 |
253 | 284 |
254 def required_length(tuple_length_options, argument_name): | 285 def required_length(tuple_length_options, argument_name): |
255 class RequiredLength(argparse.Action): | 286 class RequiredLength(argparse.Action): |
256 def __call__(self, parser, args, values, option_string=None): | 287 def __call__(self, parser, args, values, option_string=None): |
257 if len(values) not in tuple_length_options: | 288 if len(values) not in tuple_length_options: |
258 msg = 'Option {argument_name} requires one of the following number of arguments: {tuple_length_options}'.format( | 289 msg = 'Option {argument_name} requires one of the following number of' \ |
259 argument_name=self.argument_name, tuple_length_options=tuple_length_options) | 290 ' arguments: {tuple_length_options}'.format(argument_name=self.argument_name, |
291 tuple_length_options=tuple_length_options) | |
260 raise argparse.ArgumentTypeError(msg) | 292 raise argparse.ArgumentTypeError(msg) |
261 setattr(args, self.dest, values) | 293 setattr(args, self.dest, values) |
262 return RequiredLength | 294 return RequiredLength |
263 | 295 |
264 | 296 |
274 line = line.splitlines()[0] | 306 line = line.splitlines()[0] |
275 if len(line) > 0: | 307 if len(line) > 0: |
276 if not blank_line_found: | 308 if not blank_line_found: |
277 if line.startswith('>'): | 309 if line.startswith('>'): |
278 if len(temp_sequence_dict) > 0: | 310 if len(temp_sequence_dict) > 0: |
279 if temp_sequence_dict.values()[0]['length'] - 2 * length_extra_seq > 0: | 311 if list(temp_sequence_dict.values())[0]['length'] - 2 * length_extra_seq > 0: |
280 sequence_dict[temp_sequence_dict.keys()[0]] = temp_sequence_dict.values()[0] | 312 sequence_dict[list(temp_sequence_dict.keys())[0]] = list(temp_sequence_dict.values())[0] |
281 headers[temp_sequence_dict.values()[0]['header'].lower()] = sequence_counter | 313 headers[list(temp_sequence_dict.values())[0]['header'].lower()] = sequence_counter |
282 else: | 314 else: |
283 print temp_sequence_dict.values()[0]['header'] + ' sequence ignored due to length <= 0' | 315 print(list(temp_sequence_dict.values())[0]['header'] + ' sequence ignored due to ' |
316 'length <= 0') | |
284 temp_sequence_dict = {} | 317 temp_sequence_dict = {} |
285 | 318 |
286 if line[1:].lower() in headers: | 319 if line[1:].lower() in headers: |
287 sys.exit('Found duplicated sequence headers') | 320 sys.exit('Found duplicated sequence headers') |
288 | 321 |
295 sys.exit('It was found a blank line between the fasta file above line ' + line) | 328 sys.exit('It was found a blank line between the fasta file above line ' + line) |
296 else: | 329 else: |
297 blank_line_found = True | 330 blank_line_found = True |
298 | 331 |
299 if len(temp_sequence_dict) > 0: | 332 if len(temp_sequence_dict) > 0: |
300 if temp_sequence_dict.values()[0]['length'] - 2 * length_extra_seq > 0: | 333 if list(temp_sequence_dict.values())[0]['length'] - 2 * length_extra_seq > 0: |
301 sequence_dict[temp_sequence_dict.keys()[0]] = temp_sequence_dict.values()[0] | 334 sequence_dict[list(temp_sequence_dict.keys())[0]] = list(temp_sequence_dict.values())[0] |
302 headers[temp_sequence_dict.values()[0]['header'].lower()] = sequence_counter | 335 headers[list(temp_sequence_dict.values())[0]['header'].lower()] = sequence_counter |
303 else: | 336 else: |
304 print temp_sequence_dict.values()[0]['header'] + ' sequence ignored due to length <= 0' | 337 print(list(temp_sequence_dict.values())[0]['header'] + ' sequence ignored due to length <= 0') |
305 | 338 |
306 return sequence_dict, headers | 339 return sequence_dict, headers |
307 | 340 |
308 | 341 |
309 def simplify_sequence_dict(sequence_dict): | 342 def simplify_sequence_dict(sequence_dict): |
310 simple_sequence_dict = {} | 343 simple_sequence_dict = {} |
311 for counter, info in sequence_dict.items(): | 344 for counter, info in list(sequence_dict.items()): |
312 simple_sequence_dict[info['header']] = info | 345 simple_sequence_dict[info['header']] = info |
313 del simple_sequence_dict[info['header']]['header'] | 346 del simple_sequence_dict[info['header']]['header'] |
314 return simple_sequence_dict | 347 return simple_sequence_dict |
315 | 348 |
316 | 349 |
330 sequence_dict[i]['header'] = sequence_dict[i]['header'].replace(x, '_') | 363 sequence_dict[i]['header'] = sequence_dict[i]['header'].replace(x, '_') |
331 headers_changed = True | 364 headers_changed = True |
332 new_headers[sequence_dict[i]['header'].lower()] = i | 365 new_headers[sequence_dict[i]['header'].lower()] = i |
333 | 366 |
334 if headers_changed: | 367 if headers_changed: |
335 print 'At least one of the those characters was found. Replacing those with _' + '\n' | 368 print('At least one of the those characters was found. Replacing those with _' + '\n') |
336 | 369 |
337 return sequence_dict, new_headers | 370 return sequence_dict, new_headers |