Mercurial > repos > jaredgk > ppp_vcfphase
diff beagle.py @ 0:3830d29fca6a draft
Uploaded
| author | jaredgk |
|---|---|
| date | Mon, 15 Oct 2018 18:15:47 -0400 |
| parents | |
| children | 54c84f7dcb2c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/beagle.py Mon Oct 15 18:15:47 2018 -0400 @@ -0,0 +1,163 @@ +import os +import sys +import subprocess +import shutil +import argparse +import glob +import logging + +sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared'))) + +from vcf_reader_func import checkFormat +from logging_module import initLogger, logArgs +from vcftools import bgzip_decompress_vcfgz +from bcftools import convert_to_bcf, check_for_index, create_index + +def delete_beagle_log (output_prefix): + ''' + Delete beagle log file + + This function is used to delete beagle's log file if an error is + encountered. A warning is produced if the log file cannot be found. + + Parameters + ---------- + output_prefix : str + Output file prefix + ''' + + # Check that log file exists, if not return warning + if not os.path.isfile(output_prefix + '.log'): + logging.warning('beagle log file %s.log does not exist' % output_prefix) + else: + os.remove(output_prefix + '.log') + +def check_beagle_for_errors (beagle_stderr, output_prefix): + ''' + Checks the beagle stdout for errors + + Parameters + ---------- + beagle_stderr : str + beagle stderr + output_prefix : str + Output file prefix + + Raises + ------ + Exception + If beagle stdout returns an error + ''' + + # Check if beagle completed without an error + if not beagle_stderr.strip(): + pass + + # Print missing data message if that is likely + elif 'ERROR: genotype is missing allele separator:' in str(beagle_stderr): + # Delete the beagle log file + delete_beagle_log(output_prefix) + + # Store reported error + error_reported = 'ERROR: genotype is missing allele separator' + # Store message for user about error + user_message = 'Please confirm the input has no missing data.' + # Report on the error + raise Exception(error_reported + '\n' + user_message) + + # Print output for beagle if error is detected + elif 'ERROR:' in str(beagle_stderr): + # Delete the beagle log file + delete_beagle_log(output_prefix) + + # Splits log into list of lines + beagle_stderr_lines = beagle_stderr.splitlines() + # Prints the error(s) + raise Exception('\n'.join((output_line for output_line in beagle_stderr_lines if output_line.startswith('ERROR:')))) + + # Print output if not completed and no error found. Unlikely to be used, but included. + else: + # Delete the beagle log file + delete_beagle_log(output_prefix) + + raise Exception(beagle_stderr) + + +def standard_beagle_call (beagle_path, beagle_call_args, output_prefix): + ''' + Calls beagle using subprocess + + This function is used to call beagle under standard conditions. The + functions then passes the stderr to check_beagle_for_errors to check + for errors. + + Parameters + ---------- + beagle_path : str + Path to beagle.jar + beagle_call_args : list + Argument list for beagle + output_prefix : str + Output file prefix + ''' + + # Assign location of beagle jar file + beagle_jar = os.path.join(beagle_path, 'beagle.jar') + + # Check that beagle.jar exists + if not os.path.isfile(beagle_jar): + raise IOError('beagle.jar not found. Path specified: %s' % beagle_path) + + logging.info('beagle phasing parameters assigned') + + # Phasing subprocess call + phase_call = subprocess.Popen(['java', '-jar', beagle_jar] + beagle_call_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE) + phase_stdout, phase_stderr = phase_call.communicate() + + # Check if code is running in python 3 + if sys.version_info[0] == 3: + # Convert bytes to string + phase_stderr = phase_stderr.decode() + + # Check beagle call for errors + check_beagle_for_errors(phase_stderr, output_prefix) + + logging.info('beagle phasing complete') + +def call_beagle (beagle_path, beagle_call_args, output_prefix, output_format): + ''' + Automates beagle calls + + This function passes the argument list to standard_beagle_call. Once the + beagle call has finished, the function will automatically convert the + bgzip compressed output of beagle to BCF and VCF, if either format is + specified. + + Parameters + ---------- + beagle_path : str + Path to beagle.jar + beagle_call_args : list + Argument list for beagle + output_prefix : str + Output file prefix + output_format : str + Output file format + ''' + print beagle_call_args + # Standard call to beagle + standard_beagle_call(beagle_path, beagle_call_args, output_prefix) + + # Decompress if a VCF files is requested + if output_format == 'vcf': + bgzip_decompress_vcfgz(output_prefix + '.vcf.gz') + + # Convert to BCF if requested + elif output_format == 'bcf': + + # Check if there is an index file + if check_for_index(output_prefix + '.vcf.gz') == False: + # Create an index if not found + create_index(output_prefix + '.vcf.gz') + # Convert vcf.gz to bcf + convert_to_bcf(output_prefix + '.vcf.gz', output_prefix)
