Mercurial > repos > jaredgk > ppp_vcfphase
comparison vcftools.py @ 3:d1e3db7f6521 draft
Uploaded
| author | jaredgk |
|---|---|
| date | Wed, 17 Oct 2018 17:28:38 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 2:54c84f7dcb2c | 3:d1e3db7f6521 |
|---|---|
| 1 import os | |
| 2 import sys | |
| 3 import logging | |
| 4 import subprocess | |
| 5 | |
| 6 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir,'jared'))) | |
| 7 | |
| 8 from vcf_reader_func import checkFormat | |
| 9 from bcftools import check_bcftools_for_errors | |
| 10 | |
| 11 def check_bgzip_for_errors (bgzip_stderr): | |
| 12 ''' | |
| 13 Checks the bgzip stderr for errors | |
| 14 | |
| 15 Parameters | |
| 16 ---------- | |
| 17 bgzip_stderr : str | |
| 18 bgzip stderr | |
| 19 | |
| 20 Raises | |
| 21 ------ | |
| 22 IOError | |
| 23 If bgzip stderr returns an error | |
| 24 ''' | |
| 25 | |
| 26 if bgzip_stderr: | |
| 27 raise IOError('Error occured while compressing the vcf file') | |
| 28 | |
| 29 def bgzip_decompress_vcfgz (vcfgz_filename, out_prefix = '', keep_original = False): | |
| 30 ''' | |
| 31 Converts a vcf.gz to vcf | |
| 32 | |
| 33 The function automates bgzip to decompress a vcf.gz file into a vcf | |
| 34 | |
| 35 Parameters | |
| 36 ---------- | |
| 37 vcfgz_filename : str | |
| 38 The file name of the vcf.gz file to be decompressed | |
| 39 out_prefix : str | |
| 40 Output file prefix (i.e. filename without extension) | |
| 41 keep_original : bool | |
| 42 Specifies if the original file should be kept | |
| 43 | |
| 44 Raises | |
| 45 ------ | |
| 46 IOError | |
| 47 Error in creating the compressed file | |
| 48 ''' | |
| 49 | |
| 50 # Run bgzip with stdout piped to file | |
| 51 if keep_original or out_prefix: | |
| 52 | |
| 53 if out_prefix: | |
| 54 | |
| 55 # Assign the bgzip filename | |
| 56 vcf_filename = out_prefix + '.vcf' | |
| 57 | |
| 58 else: | |
| 59 | |
| 60 # Seperate into path and filename | |
| 61 split_path, split_filename = os.path.split(vcfgz_filename) | |
| 62 | |
| 63 # Remove any file extensions | |
| 64 vcf_basename = split_filename.split(os.extsep)[0] + '.vcf' | |
| 65 | |
| 66 # Join path and filename | |
| 67 vcf_filename = os.path.join(split_path, vcf_basename) | |
| 68 | |
| 69 # Create the output file | |
| 70 vcf_file = open(vcf_filename, 'w') | |
| 71 | |
| 72 # bgzip subprocess call | |
| 73 bgzip_call = subprocess.Popen(['bgzip', '-dc', vcfgz_filename], stdout = vcf_file, stderr = subprocess.PIPE) | |
| 74 | |
| 75 # Run bgzip normally | |
| 76 else: | |
| 77 | |
| 78 # bgzip subprocess call | |
| 79 bgzip_call = subprocess.Popen(['bgzip', '-d', vcfgz_filename], stdout = subprocess.PIPE, stderr = subprocess.PIPE) | |
| 80 | |
| 81 # Save the stdout and stderr from bgzip | |
| 82 bgzip_out, bgzip_err = bgzip_call.communicate() | |
| 83 | |
| 84 # Check that output file was compressed correctly | |
| 85 check_bgzip_for_errors(bgzip_err) | |
| 86 | |
| 87 # Delete input when also using an output prefix | |
| 88 if out_prefix and not keep_original: | |
| 89 os.remove(vcfgz_filename) | |
| 90 | |
| 91 def bgzip_compress_vcf (vcf_filename, out_prefix = '', keep_original = False): | |
| 92 ''' | |
| 93 Converts a vcf to vcf.gz | |
| 94 | |
| 95 The function automates bgzip to compress a vcf file into a vcf.gz | |
| 96 | |
| 97 Parameters | |
| 98 ---------- | |
| 99 vcf_filename : str | |
| 100 The file name of the vcf file to be compressed | |
| 101 keep_original : bool | |
| 102 Specifies if the original file should be kept | |
| 103 | |
| 104 Raises | |
| 105 ------ | |
| 106 IOError | |
| 107 Error in creating the compressed file | |
| 108 ''' | |
| 109 | |
| 110 # Compress and keep the original file | |
| 111 if keep_original or out_prefix: | |
| 112 | |
| 113 if out_prefix: | |
| 114 | |
| 115 # Assign the filename | |
| 116 vcfgz_filename = out_prefix + '.vcf.gz' | |
| 117 | |
| 118 else: | |
| 119 | |
| 120 # Seperate into path and filename | |
| 121 split_path, split_filename = os.path.split(vcfgz_filename) | |
| 122 | |
| 123 # Remove any file extensions | |
| 124 vcfgz_basename = split_filename.split(os.extsep)[0] + '.vcf.gz' | |
| 125 | |
| 126 # Join path and filename | |
| 127 vcfgz_filename = os.path.join(split_path, vcfgz_basename) | |
| 128 | |
| 129 | |
| 130 # Create the output file | |
| 131 vcfgz_file = open(vcfgz_filename, 'w') | |
| 132 | |
| 133 # bgzip subprocess call | |
| 134 bgzip_call = subprocess.Popen(['bgzip', '-c', vcf_filename], stdout = vcfgz_file, stderr = subprocess.PIPE) | |
| 135 | |
| 136 # Compress and do not keep the original file | |
| 137 else: | |
| 138 | |
| 139 # bgzip subprocess call | |
| 140 bgzip_call = subprocess.Popen(['bgzip', vcf_filename], stdout = subprocess.PIPE, stderr = subprocess.PIPE) | |
| 141 | |
| 142 # Save the stdout and stderr from bgzip | |
| 143 bgzip_out, bgzip_err = bgzip_call.communicate() | |
| 144 | |
| 145 # Check that output file was compressed correctly | |
| 146 check_bgzip_for_errors(bgzip_err) | |
| 147 | |
| 148 def cvt_vcftools_site_to_bed (vcftools_out_str): | |
| 149 # Check if str in the header | |
| 150 if 'CHROM' not in vcftools_out_str or 'POS' not in vcftools_out_str: | |
| 151 # Split the line into a list | |
| 152 vcftools_out_data = vcftools_out_str.strip().split('\t') | |
| 153 # Convert the chromStart to int | |
| 154 vcftools_out_data[1] = int(vcftools_out_data[1]) | |
| 155 # Calc chromEnd | |
| 156 chrom_end = vcftools_out_data[1] + 1 | |
| 157 # Add chrom_end to the list | |
| 158 vcftools_out_data = vcftools_out_data + [chrom_end] | |
| 159 # Return the list as a string (with newline element) | |
| 160 return '\t'.join(map(str, vcftools_out_data)) + '\n' | |
| 161 else: | |
| 162 # Remove the header | |
| 163 return '' | |
| 164 | |
| 165 def pipe_vcftools (vcftools_call_args): | |
| 166 ''' | |
| 167 Calls vcftools with pipe output | |
| 168 | |
| 169 The output of this function is the stdout and stderr of vcftools. This | |
| 170 function should only be used if vcftools is being used as the stdin of | |
| 171 another function. Please note that this function does not check the for | |
| 172 errors in the vcftools call. Please check for errors after the call is | |
| 173 closed using check_vcftools_for_errors. | |
| 174 | |
| 175 Parameters | |
| 176 ---------- | |
| 177 vcftools_call_args : list | |
| 178 vcftools arguments | |
| 179 | |
| 180 Returns | |
| 181 ------- | |
| 182 vcftools_call : subprocess.Popen | |
| 183 vcftools subprocess call | |
| 184 vcftools_call.stdout : PIPE | |
| 185 vcftools stdout PIPE (Results) | |
| 186 vcftools_call.stderr : PIPE | |
| 187 vcftools stderr PIPE (Log) | |
| 188 | |
| 189 ''' | |
| 190 | |
| 191 # vcftools subprocess call | |
| 192 vcftools_call = subprocess.Popen(['vcftools', '--stdout'] + list(map(str, vcftools_call_args)), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| 193 | |
| 194 return vcftools_call | |
| 195 | |
| 196 def pipe_vcftools_to_bed_file (vcftools_call_args, output_filename): | |
| 197 | |
| 198 ''' | |
| 199 Pipes site-file output of vcftools to a bed formmated file | |
| 200 | |
| 201 The purpose of this function is to avoid creating large uncompressed | |
| 202 vcf files by directly piping the output of vcftools to bgzip. This | |
| 203 results in creating a vcf.gz file without any intermediates. | |
| 204 | |
| 205 Parameters | |
| 206 ---------- | |
| 207 vcftools_call_args : list | |
| 208 vcftools arguments | |
| 209 output_filename : str | |
| 210 Filename of the bed file | |
| 211 | |
| 212 ''' | |
| 213 # Open vcftools pipe | |
| 214 vcftools_call = pipe_vcftools(vcftools_call_args) | |
| 215 | |
| 216 # Create the bed file | |
| 217 bed_output = open(output_filename, 'w') | |
| 218 | |
| 219 try: | |
| 220 # Iterate the vcftools stdout unless error occurs | |
| 221 for vcftools_stdout_line in iter(vcftools_call.stdout.readline, b''): | |
| 222 bed_output.write(cvt_vcftools_site_to_bed(vcftools_stdout_line)) | |
| 223 # Close the bed file | |
| 224 bed_output.close() | |
| 225 except: | |
| 226 # Close the bed file | |
| 227 bed_output.close() | |
| 228 # Delete the file | |
| 229 os.remove(output_filename) | |
| 230 | |
| 231 # Wait for vctools to finish | |
| 232 vcftools_call.wait() | |
| 233 | |
| 234 # Close the vcftools stdout | |
| 235 vcftools_call.stdout.close() | |
| 236 | |
| 237 # Read the vcftools stderr | |
| 238 vcftools_stderr = vcftools_call.stderr.read() | |
| 239 | |
| 240 # Check if code is running in python 3 | |
| 241 if sys.version_info[0] == 3: | |
| 242 # Convert bytes to string | |
| 243 vcftools_stderr = vcftools_stderr.decode() | |
| 244 | |
| 245 # Check that the log file was created correctly | |
| 246 check_vcftools_for_errors(vcftools_stderr) | |
| 247 | |
| 248 logging.info('vcftools call complete') | |
| 249 | |
| 250 return vcftools_stderr | |
| 251 | |
| 252 def pipe_vcftools_bgzip (vcftools_call_args, output_filename): | |
| 253 ''' | |
| 254 Pipes the output of vcftools to bgzip | |
| 255 | |
| 256 The purpose of this function is to avoid creating large uncompressed | |
| 257 vcf files by directly piping the output of vcftools to bgzip. This | |
| 258 results in creating a vcf.gz file without any intermediates. | |
| 259 | |
| 260 Parameters | |
| 261 ---------- | |
| 262 vcftools_call_args : list | |
| 263 vcftools arguments | |
| 264 output_filename : str | |
| 265 Filename of the compressed vcf file | |
| 266 | |
| 267 ''' | |
| 268 | |
| 269 vcftools_call = pipe_vcftools(vcftools_call_args) | |
| 270 | |
| 271 # Create bgzip output file | |
| 272 bgzip_output = open(output_filename, 'wb') | |
| 273 | |
| 274 # bgzip subprocess call | |
| 275 bgzip_call = subprocess.Popen(['bgzip'], stdin = vcftools_call.stdout, stdout = bgzip_output, stderr = subprocess.PIPE) | |
| 276 | |
| 277 # Wait for vctools to finish | |
| 278 vcftools_call.wait() | |
| 279 | |
| 280 # Close the vcftools stdout | |
| 281 vcftools_call.stdout.close() | |
| 282 | |
| 283 # Read the vcftools stderr | |
| 284 vcftools_stderr = vcftools_call.stderr.read() | |
| 285 | |
| 286 # Check if code is running in python 3 | |
| 287 if sys.version_info[0] == 3: | |
| 288 # Convert bytes to string | |
| 289 vcftools_stderr = vcftools_stderr.decode() | |
| 290 | |
| 291 # Check that the log file was created correctly | |
| 292 check_vcftools_for_errors(vcftools_stderr) | |
| 293 | |
| 294 # Wait for bgzip to finish | |
| 295 bgzip_call.wait() | |
| 296 | |
| 297 # Close the compressed vcf file | |
| 298 bgzip_output.close() | |
| 299 | |
| 300 # Save the stderr from bgzip, stdout = None | |
| 301 bgzip_stdout, bgzip_stderr = bgzip_call.communicate() | |
| 302 | |
| 303 # Check if code is running in python 3 | |
| 304 if sys.version_info[0] == 3: | |
| 305 # Convert bytes to string | |
| 306 bgzip_stderr = bgzip_stderr.decode() | |
| 307 | |
| 308 # Check that output file was compressed correctly | |
| 309 check_bgzip_for_errors(bgzip_stderr) | |
| 310 | |
| 311 logging.info('vcftools and bgzip calls complete') | |
| 312 | |
| 313 return vcftools_stderr | |
| 314 | |
| 315 def pipe_vcftools_bcftools (vcftools_call_args, output_filename): | |
| 316 ''' | |
| 317 Pipes the output of vcftools to bcftools | |
| 318 | |
| 319 The purpose of this function is to avoid the vcftools command | |
| 320 --recode-bcf that may result in malformed BCF files. To avoid large | |
| 321 uncompressed intermediates, this function pipes the stdout of vcftools | |
| 322 to bcftools. | |
| 323 | |
| 324 Parameters | |
| 325 ---------- | |
| 326 vcftools_call_args : list | |
| 327 vcftools arguments | |
| 328 output_filename : str | |
| 329 Filename of the BCF file | |
| 330 | |
| 331 ''' | |
| 332 | |
| 333 vcftools_call = pipe_vcftools(vcftools_call_args) | |
| 334 | |
| 335 # Holds the arguments to convert to BCF format | |
| 336 convert_args = ['view', '-O', 'b'] | |
| 337 | |
| 338 # Assigns the output file to the arguments | |
| 339 convert_args.extend(['-o', output_filename]) | |
| 340 | |
| 341 # bcftools subprocess call | |
| 342 bcftools_call = subprocess.Popen(['bcftools'] + convert_args, stdin = vcftools_call.stdout, stdout = subprocess.PIPE, stderr = subprocess.PIPE) | |
| 343 | |
| 344 # Wait for vctools to finish | |
| 345 vcftools_call.wait() | |
| 346 | |
| 347 # Close the vcftools stdout | |
| 348 vcftools_call.stdout.close() | |
| 349 | |
| 350 # Read the vcftools stderr | |
| 351 vcftools_stderr = vcftools_call.stderr.read() | |
| 352 | |
| 353 # Check if code is running in python 3 | |
| 354 if sys.version_info[0] == 3: | |
| 355 # Convert bytes to string | |
| 356 vcftools_stderr = vcftools_stderr.decode() | |
| 357 | |
| 358 # Check that the log file was created correctly | |
| 359 check_vcftools_for_errors(vcftools_stderr) | |
| 360 | |
| 361 # Wait for bgzip to finish | |
| 362 bcftools_call.wait() | |
| 363 | |
| 364 # Save the stderr from bgzip, stdout = None | |
| 365 bcftools_stdout, bcftools_stderr = bcftools_call.communicate() | |
| 366 | |
| 367 # Check if code is running in python 3 | |
| 368 if sys.version_info[0] == 3: | |
| 369 # Convert bytes to string | |
| 370 bcftools_stderr = bcftools_stderr.decode() | |
| 371 | |
| 372 # Check that output file was compressed correctly | |
| 373 check_bcftools_for_errors(bcftools_stderr) | |
| 374 | |
| 375 logging.info('vcftools and bcftools calls complete') | |
| 376 | |
| 377 return vcftools_stderr | |
| 378 | |
| 379 def pipe_vcftools_to_file (vcftools_call_args, output_filename, append_output = False): | |
| 380 ''' | |
| 381 Pipes file output of vcftools to a standard file | |
| 382 | |
| 383 The function calls vcftools. Returns the stderr of vcftools to | |
| 384 create log file of the call. The function may be used to append multiple | |
| 385 calls to vcftools to a single file | |
| 386 | |
| 387 Parameters | |
| 388 ---------- | |
| 389 vcftools_call_args : list | |
| 390 vcftools arguments | |
| 391 append_output : bool | |
| 392 The output file should be written in append mode | |
| 393 | |
| 394 Returns | |
| 395 ------- | |
| 396 vcftools_err : str | |
| 397 vcftools log output | |
| 398 | |
| 399 Raises | |
| 400 ------ | |
| 401 Exception | |
| 402 If vcftools stderr returns an error | |
| 403 ''' | |
| 404 | |
| 405 # Open vcftools pipe | |
| 406 vcftools_call = pipe_vcftools(vcftools_call_args) | |
| 407 | |
| 408 # Check if the output should be opened in append mode | |
| 409 if append_output: | |
| 410 # Create the output file (in append mode) | |
| 411 output_file = open(output_filename, 'a') | |
| 412 else: | |
| 413 # Create the output file (in write mode) | |
| 414 output_file = open(output_filename, 'w') | |
| 415 | |
| 416 | |
| 417 try: | |
| 418 # Create iterator of the vcftools stdout | |
| 419 stdout_iter = iter(vcftools_call.stdout.readline, b'') | |
| 420 | |
| 421 # Check if the output is being appended and the file is empty | |
| 422 if append_output and os.stat(output_filename).st_size != 0: | |
| 423 # Skip the header if the file isn't empty and appending | |
| 424 next(stdout_iter) | |
| 425 | |
| 426 # Iterate the vcftools stdout | |
| 427 for vcftools_stdout_line in stdout_iter: | |
| 428 | |
| 429 # Check if code is running in python 3 | |
| 430 if sys.version_info[0] == 3: | |
| 431 # Convert bytes to string | |
| 432 vcftools_stdout_line = vcftools_stdout_line.decode() | |
| 433 | |
| 434 output_file.write(vcftools_stdout_line) | |
| 435 | |
| 436 # Close the bed file | |
| 437 output_file.close() | |
| 438 | |
| 439 except: | |
| 440 # Close the bed file | |
| 441 output_file.close() | |
| 442 # Delete the file | |
| 443 os.remove(output_filename) | |
| 444 | |
| 445 raise Exception('vcftools to python pipe error') | |
| 446 | |
| 447 # Wait for vctools to finish | |
| 448 vcftools_call.wait() | |
| 449 | |
| 450 # Close the vcftools stdout | |
| 451 vcftools_call.stdout.close() | |
| 452 | |
| 453 # Read the vcftools stderr | |
| 454 vcftools_stderr = vcftools_call.stderr.read() | |
| 455 | |
| 456 # Check if code is running in python 3 | |
| 457 if sys.version_info[0] == 3: | |
| 458 # Convert bytes to string | |
| 459 vcftools_stderr = vcftools_stderr.decode() | |
| 460 | |
| 461 # Check that the log file was created correctly | |
| 462 check_vcftools_for_errors(vcftools_stderr) | |
| 463 | |
| 464 logging.info('vcftools call complete') | |
| 465 | |
| 466 return vcftools_stderr | |
| 467 | |
| 468 def standard_vcftools_call (vcftools_call_args): | |
| 469 ''' | |
| 470 Calls vcftools | |
| 471 | |
| 472 The function calls vcftools. Returns the stderr of vcftools to | |
| 473 create log file of the call. | |
| 474 | |
| 475 Parameters | |
| 476 ---------- | |
| 477 vcftools_call_args : list | |
| 478 vcftools arguments | |
| 479 | |
| 480 Returns | |
| 481 ------- | |
| 482 vcftools_out : str | |
| 483 vcftools call output | |
| 484 vcftools_err : str | |
| 485 vcftools log output | |
| 486 | |
| 487 Raises | |
| 488 ------ | |
| 489 Exception | |
| 490 If vcftools stderr returns an error | |
| 491 ''' | |
| 492 | |
| 493 # vcftools subprocess call without stdout | |
| 494 vcftools_call = subprocess.Popen(['vcftools'] + list(map(str, vcftools_call_args)), stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| 495 | |
| 496 # Wait for vcftools to finish | |
| 497 vcftools_stdout, vcftools_stderr = vcftools_call.communicate() | |
| 498 | |
| 499 # Check if code is running in python 3 | |
| 500 if sys.version_info[0] == 3: | |
| 501 # Convert bytes to string | |
| 502 vcftools_stderr = vcftools_stderr.decode() | |
| 503 | |
| 504 logging.info('vcftools call complete') | |
| 505 | |
| 506 # Check that the log file was created correctly | |
| 507 check_vcftools_for_errors(vcftools_stderr) | |
| 508 | |
| 509 return vcftools_stderr | |
| 510 | |
| 511 def call_vcftools (vcftools_call_args, output_format = None, output_filename = None): | |
| 512 ''' | |
| 513 Calls vcftools | |
| 514 | |
| 515 The function calls vcftools. Returns the stderr of vcftools to | |
| 516 create log file of the call. | |
| 517 | |
| 518 Parameters | |
| 519 ---------- | |
| 520 vcftools_call_args : list | |
| 521 vcftools arguments | |
| 522 output_format : str | |
| 523 The output format | |
| 524 output_filename : str | |
| 525 The output filename assigned by vcftools (for piped calls) | |
| 526 | |
| 527 Returns | |
| 528 ------- | |
| 529 vcftools_out : str | |
| 530 vcftools call output | |
| 531 vcftools_err : str | |
| 532 vcftools log output | |
| 533 | |
| 534 Raises | |
| 535 ------ | |
| 536 Exception | |
| 537 If vcftools stderr returns an error | |
| 538 ''' | |
| 539 | |
| 540 # Check if the output is a bgzipped vcf | |
| 541 if output_format == 'vcf.gz': | |
| 542 # Pipe vcftools stdout to bgzip to create a bgzipped vcf | |
| 543 vcftools_err = pipe_vcftools_bgzip(vcftools_call_args, output_filename) | |
| 544 # Check if the output is a bcf | |
| 545 elif output_format == 'bcf': | |
| 546 # Pipe vcftools stdout to bgzip to create a bgzipped vcf | |
| 547 vcftools_err = pipe_vcftools_bcftools(vcftools_call_args, output_filename) | |
| 548 elif output_format == 'removed_bed' or output_format == 'kept_bed': | |
| 549 # Pipe vcftools stdout to bed file | |
| 550 vcftools_err = pipe_vcftools_to_bed_file(vcftools_call_args, output_filename) | |
| 551 elif output_format == 'het-fis': | |
| 552 vcftools_err = pipe_vcftools_to_file(vcftools_call_args, output_filename, append_output = True) | |
| 553 else: | |
| 554 # Call vcftools under standard conditions | |
| 555 vcftools_err = standard_vcftools_call(vcftools_call_args) | |
| 556 | |
| 557 # Return the log | |
| 558 return vcftools_err | |
| 559 | |
| 560 def check_for_vcftools_output (vcftools_output): | |
| 561 ''' | |
| 562 Checks for the previous vcftools output | |
| 563 | |
| 564 Confirms that neither a previous vcftools log or output file exists. | |
| 565 | |
| 566 Parameters | |
| 567 ---------- | |
| 568 vcftools_output : str | |
| 569 Specifies the output filename to be checked | |
| 570 | |
| 571 Raises | |
| 572 ------ | |
| 573 IOError | |
| 574 If the vcftools output file exists | |
| 575 IOError | |
| 576 If the vcftools log file exists | |
| 577 | |
| 578 ''' | |
| 579 # Check if output file already exists | |
| 580 if os.path.isfile(vcftools_output): | |
| 581 raise IOError('VCF output file already exists') | |
| 582 | |
| 583 logging.info('Output file assigned') | |
| 584 | |
| 585 # Check if log file already exists | |
| 586 if os.path.isfile(vcftools_output + '.log'): | |
| 587 raise IOError('Log file already exists') | |
| 588 | |
| 589 logging.info('Log file assigned') | |
| 590 | |
| 591 def delete_vcftools_output (vcftools_output): | |
| 592 ''' | |
| 593 Deletes previous vcftools output | |
| 594 | |
| 595 Confirms if previous vcftools output exists, and if so, deletes it | |
| 596 | |
| 597 Parameters | |
| 598 ---------- | |
| 599 vcftools_output : str | |
| 600 Specifies the output filename to be deleted | |
| 601 | |
| 602 Raises | |
| 603 ------ | |
| 604 IOError | |
| 605 If the vcftools output cannot be deleted | |
| 606 IOError | |
| 607 If the vcftools log cannot be deleted | |
| 608 ''' | |
| 609 | |
| 610 # Check if output file already exists | |
| 611 if os.path.isfile(vcftools_output): | |
| 612 try: | |
| 613 # Delete the output | |
| 614 os.remove(vcftools_output) | |
| 615 except: | |
| 616 raise IOError('VCF output file cannot be deleted') | |
| 617 | |
| 618 logging.info('Output file assigned') | |
| 619 | |
| 620 # Check if log file already exists | |
| 621 if os.path.isfile(vcftools_output + '.log'): | |
| 622 try: | |
| 623 # Delete the output | |
| 624 os.remove(vcftools_output + '.log') | |
| 625 except: | |
| 626 raise IOError('Log file cannot be deleted') | |
| 627 | |
| 628 logging.info('Log file assigned') | |
| 629 | |
| 630 def check_vcftools_for_errors (vcftools_stderr): | |
| 631 ''' | |
| 632 Checks the vcftools stderr for errors | |
| 633 | |
| 634 Parameters | |
| 635 ---------- | |
| 636 vcftools_stderr : str | |
| 637 vcftools stderr | |
| 638 | |
| 639 Raises | |
| 640 ------ | |
| 641 IOError | |
| 642 If vcftools stderr returns an error | |
| 643 ''' | |
| 644 | |
| 645 # Returns True if the job completed without error | |
| 646 if 'Run Time' in str(vcftools_stderr): | |
| 647 pass | |
| 648 | |
| 649 # Print output for vcftools if error is detected | |
| 650 elif 'Error' in str(vcftools_stderr): | |
| 651 # Splits log into list of lines | |
| 652 vcftools_stderr_lines = vcftools_stderr.splitlines() | |
| 653 # Prints the error(s) | |
| 654 raise Exception('\n'.join((output_line for output_line in vcftools_stderr_lines if output_line.startswith('Error')))) | |
| 655 | |
| 656 # Print output if not completed and no error found. Unlikely to be used, but included. | |
| 657 else: | |
| 658 raise Exception(vcftools_stderr) | |
| 659 | |
| 660 def produce_vcftools_output (output, filename, append_mode = False, strip_header = False): | |
| 661 ''' | |
| 662 Creates the vcftools output file | |
| 663 | |
| 664 This function will create an output file from the vcftools stdout. | |
| 665 Please run `check_vcftools_for_errors` prior to check that vcftools | |
| 666 finished without error. | |
| 667 | |
| 668 Parameters | |
| 669 ---------- | |
| 670 output : str | |
| 671 vcftools stdout | |
| 672 filename : str | |
| 673 Specifies the filename for the output file | |
| 674 append_mode : bool | |
| 675 Used to create a single output file from multiple calls | |
| 676 strip_header : bool | |
| 677 Used to remove the header if not needed | |
| 678 | |
| 679 Returns | |
| 680 ------- | |
| 681 output : file | |
| 682 vcftools output file | |
| 683 | |
| 684 ''' | |
| 685 | |
| 686 # Check if the header should be stripped | |
| 687 if strip_header: | |
| 688 output = ''.join(output.splitlines(True)[1:]) | |
| 689 | |
| 690 # Check if single log file is required from multiple calls | |
| 691 if append_mode: | |
| 692 vcftools_log_file = open(filename,'a') | |
| 693 else: | |
| 694 vcftools_log_file = open(filename,'w') | |
| 695 | |
| 696 vcftools_log_file.write(str(output)) | |
| 697 vcftools_log_file.close() | |
| 698 | |
| 699 def produce_vcftools_log (output, filename, append_mode = False): | |
| 700 ''' | |
| 701 Creates the vcftools log file | |
| 702 | |
| 703 This function will create a log file from the vcftools stderr. Please | |
| 704 run `check_vcftools_for_errors` prior to check that vcftools finished | |
| 705 without error. | |
| 706 | |
| 707 Parameters | |
| 708 ---------- | |
| 709 output : str | |
| 710 vcftools stderr | |
| 711 filename : str | |
| 712 Specifies the filename for the log file | |
| 713 append_mode : bool | |
| 714 Used to create a single log file from multiple calls | |
| 715 | |
| 716 Returns | |
| 717 ------- | |
| 718 output : file | |
| 719 vcftools log file | |
| 720 | |
| 721 ''' | |
| 722 # Check if single log file is required from multiple calls | |
| 723 if append_mode: | |
| 724 vcftools_log_file = open(filename + '.log','a') | |
| 725 else: | |
| 726 vcftools_log_file = open(filename + '.log','w') | |
| 727 | |
| 728 vcftools_log_file.write(str(output)) | |
| 729 vcftools_log_file.close() | |
| 730 | |
| 731 def assign_vcftools_input_arg (filename): | |
| 732 ''' | |
| 733 Confirms file format for vcftools | |
| 734 | |
| 735 Parameters | |
| 736 ---------- | |
| 737 filename : str | |
| 738 Specifies the input filename of unknown format | |
| 739 | |
| 740 Returns | |
| 741 ------- | |
| 742 list | |
| 743 Returns vcftools input command for `filename` | |
| 744 | |
| 745 Raises | |
| 746 ------ | |
| 747 IOError | |
| 748 If filename is an unknown file format | |
| 749 ''' | |
| 750 | |
| 751 # True if file extensions is recognized by vcftools | |
| 752 if filename.endswith('.vcf') or filename.endswith('.vcf.gz') or filename.endswith('.bcf'): | |
| 753 # Assign the associated input command | |
| 754 if filename.endswith('.vcf'): | |
| 755 return ['--vcf', filename] | |
| 756 elif filename.endswith('.vcf.gz'): | |
| 757 return ['--gzvcf', filename] | |
| 758 elif filename.endswith('.bcf'): | |
| 759 return ['--bcf', filename] | |
| 760 | |
| 761 # True if file extension is unknown or not recognized | |
| 762 else: | |
| 763 | |
| 764 # Checks if the file is unzipped, bgzipped, or gzipped | |
| 765 vcfname_format = checkFormat(filename) | |
| 766 | |
| 767 # Assign the associated input command, or return an error. | |
| 768 if vcfname_format == 'vcf': | |
| 769 return ['--vcf', filename] | |
| 770 elif vcfname_format == 'bgzip': | |
| 771 return ['--gzvcf', filename] | |
| 772 elif vcfname_format == 'bcf': | |
| 773 return ['--bcf', filename] | |
| 774 else: | |
| 775 raise Exception('Unknown VCF file format') |
