Mercurial > repos > jaredgk > ppp_vcfphase
comparison beagle.py @ 0:3830d29fca6a draft
Uploaded
| author | jaredgk |
|---|---|
| date | Mon, 15 Oct 2018 18:15:47 -0400 |
| parents | |
| children | 54c84f7dcb2c |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3830d29fca6a |
|---|---|
| 1 import os | |
| 2 import sys | |
| 3 import subprocess | |
| 4 import shutil | |
| 5 import argparse | |
| 6 import glob | |
| 7 import logging | |
| 8 | |
| 9 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) | |
| 10 | |
| 11 from vcf_reader_func import checkFormat | |
| 12 from logging_module import initLogger, logArgs | |
| 13 from vcftools import bgzip_decompress_vcfgz | |
| 14 from bcftools import convert_to_bcf, check_for_index, create_index | |
| 15 | |
| 16 def delete_beagle_log (output_prefix): | |
| 17 ''' | |
| 18 Delete beagle log file | |
| 19 | |
| 20 This function is used to delete beagle's log file if an error is | |
| 21 encountered. A warning is produced if the log file cannot be found. | |
| 22 | |
| 23 Parameters | |
| 24 ---------- | |
| 25 output_prefix : str | |
| 26 Output file prefix | |
| 27 ''' | |
| 28 | |
| 29 # Check that log file exists, if not return warning | |
| 30 if not os.path.isfile(output_prefix + '.log'): | |
| 31 logging.warning('beagle log file %s.log does not exist' % output_prefix) | |
| 32 else: | |
| 33 os.remove(output_prefix + '.log') | |
| 34 | |
| 35 def check_beagle_for_errors (beagle_stderr, output_prefix): | |
| 36 ''' | |
| 37 Checks the beagle stdout for errors | |
| 38 | |
| 39 Parameters | |
| 40 ---------- | |
| 41 beagle_stderr : str | |
| 42 beagle stderr | |
| 43 output_prefix : str | |
| 44 Output file prefix | |
| 45 | |
| 46 Raises | |
| 47 ------ | |
| 48 Exception | |
| 49 If beagle stdout returns an error | |
| 50 ''' | |
| 51 | |
| 52 # Check if beagle completed without an error | |
| 53 if not beagle_stderr.strip(): | |
| 54 pass | |
| 55 | |
| 56 # Print missing data message if that is likely | |
| 57 elif 'ERROR: genotype is missing allele separator:' in str(beagle_stderr): | |
| 58 # Delete the beagle log file | |
| 59 delete_beagle_log(output_prefix) | |
| 60 | |
| 61 # Store reported error | |
| 62 error_reported = 'ERROR: genotype is missing allele separator' | |
| 63 # Store message for user about error | |
| 64 user_message = 'Please confirm the input has no missing data.' | |
| 65 # Report on the error | |
| 66 raise Exception(error_reported + '\n' + user_message) | |
| 67 | |
| 68 # Print output for beagle if error is detected | |
| 69 elif 'ERROR:' in str(beagle_stderr): | |
| 70 # Delete the beagle log file | |
| 71 delete_beagle_log(output_prefix) | |
| 72 | |
| 73 # Splits log into list of lines | |
| 74 beagle_stderr_lines = beagle_stderr.splitlines() | |
| 75 # Prints the error(s) | |
| 76 raise Exception('\n'.join((output_line for output_line in beagle_stderr_lines if output_line.startswith('ERROR:')))) | |
| 77 | |
| 78 # Print output if not completed and no error found. Unlikely to be used, but included. | |
| 79 else: | |
| 80 # Delete the beagle log file | |
| 81 delete_beagle_log(output_prefix) | |
| 82 | |
| 83 raise Exception(beagle_stderr) | |
| 84 | |
| 85 | |
| 86 def standard_beagle_call (beagle_path, beagle_call_args, output_prefix): | |
| 87 ''' | |
| 88 Calls beagle using subprocess | |
| 89 | |
| 90 This function is used to call beagle under standard conditions. The | |
| 91 functions then passes the stderr to check_beagle_for_errors to check | |
| 92 for errors. | |
| 93 | |
| 94 Parameters | |
| 95 ---------- | |
| 96 beagle_path : str | |
| 97 Path to beagle.jar | |
| 98 beagle_call_args : list | |
| 99 Argument list for beagle | |
| 100 output_prefix : str | |
| 101 Output file prefix | |
| 102 ''' | |
| 103 | |
| 104 # Assign location of beagle jar file | |
| 105 beagle_jar = os.path.join(beagle_path, 'beagle.jar') | |
| 106 | |
| 107 # Check that beagle.jar exists | |
| 108 if not os.path.isfile(beagle_jar): | |
| 109 raise IOError('beagle.jar not found. Path specified: %s' % beagle_path) | |
| 110 | |
| 111 logging.info('beagle phasing parameters assigned') | |
| 112 | |
| 113 # Phasing subprocess call | |
| 114 phase_call = subprocess.Popen(['java', '-jar', beagle_jar] + beagle_call_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE) | |
| 115 phase_stdout, phase_stderr = phase_call.communicate() | |
| 116 | |
| 117 # Check if code is running in python 3 | |
| 118 if sys.version_info[0] == 3: | |
| 119 # Convert bytes to string | |
| 120 phase_stderr = phase_stderr.decode() | |
| 121 | |
| 122 # Check beagle call for errors | |
| 123 check_beagle_for_errors(phase_stderr, output_prefix) | |
| 124 | |
| 125 logging.info('beagle phasing complete') | |
| 126 | |
| 127 def call_beagle (beagle_path, beagle_call_args, output_prefix, output_format): | |
| 128 ''' | |
| 129 Automates beagle calls | |
| 130 | |
| 131 This function passes the argument list to standard_beagle_call. Once the | |
| 132 beagle call has finished, the function will automatically convert the | |
| 133 bgzip compressed output of beagle to BCF and VCF, if either format is | |
| 134 specified. | |
| 135 | |
| 136 Parameters | |
| 137 ---------- | |
| 138 beagle_path : str | |
| 139 Path to beagle.jar | |
| 140 beagle_call_args : list | |
| 141 Argument list for beagle | |
| 142 output_prefix : str | |
| 143 Output file prefix | |
| 144 output_format : str | |
| 145 Output file format | |
| 146 ''' | |
| 147 print beagle_call_args | |
| 148 # Standard call to beagle | |
| 149 standard_beagle_call(beagle_path, beagle_call_args, output_prefix) | |
| 150 | |
| 151 # Decompress if a VCF files is requested | |
| 152 if output_format == 'vcf': | |
| 153 bgzip_decompress_vcfgz(output_prefix + '.vcf.gz') | |
| 154 | |
| 155 # Convert to BCF if requested | |
| 156 elif output_format == 'bcf': | |
| 157 | |
| 158 # Check if there is an index file | |
| 159 if check_for_index(output_prefix + '.vcf.gz') == False: | |
| 160 # Create an index if not found | |
| 161 create_index(output_prefix + '.vcf.gz') | |
| 162 # Convert vcf.gz to bcf | |
| 163 convert_to_bcf(output_prefix + '.vcf.gz', output_prefix) |
