comparison beagle.py @ 0:3830d29fca6a draft

Uploaded
author jaredgk
date Mon, 15 Oct 2018 18:15:47 -0400
parents
children 54c84f7dcb2c
comparison
equal deleted inserted replaced
-1:000000000000 0:3830d29fca6a
1 import os
2 import sys
3 import subprocess
4 import shutil
5 import argparse
6 import glob
7 import logging
8
9 sys.path.insert(0, os.path.abspath(os.path.join(os.pardir, 'jared')))
10
11 from vcf_reader_func import checkFormat
12 from logging_module import initLogger, logArgs
13 from vcftools import bgzip_decompress_vcfgz
14 from bcftools import convert_to_bcf, check_for_index, create_index
15
16 def delete_beagle_log (output_prefix):
17 '''
18 Delete beagle log file
19
20 This function is used to delete beagle's log file if an error is
21 encountered. A warning is produced if the log file cannot be found.
22
23 Parameters
24 ----------
25 output_prefix : str
26 Output file prefix
27 '''
28
29 # Check that log file exists, if not return warning
30 if not os.path.isfile(output_prefix + '.log'):
31 logging.warning('beagle log file %s.log does not exist' % output_prefix)
32 else:
33 os.remove(output_prefix + '.log')
34
35 def check_beagle_for_errors (beagle_stderr, output_prefix):
36 '''
37 Checks the beagle stdout for errors
38
39 Parameters
40 ----------
41 beagle_stderr : str
42 beagle stderr
43 output_prefix : str
44 Output file prefix
45
46 Raises
47 ------
48 Exception
49 If beagle stdout returns an error
50 '''
51
52 # Check if beagle completed without an error
53 if not beagle_stderr.strip():
54 pass
55
56 # Print missing data message if that is likely
57 elif 'ERROR: genotype is missing allele separator:' in str(beagle_stderr):
58 # Delete the beagle log file
59 delete_beagle_log(output_prefix)
60
61 # Store reported error
62 error_reported = 'ERROR: genotype is missing allele separator'
63 # Store message for user about error
64 user_message = 'Please confirm the input has no missing data.'
65 # Report on the error
66 raise Exception(error_reported + '\n' + user_message)
67
68 # Print output for beagle if error is detected
69 elif 'ERROR:' in str(beagle_stderr):
70 # Delete the beagle log file
71 delete_beagle_log(output_prefix)
72
73 # Splits log into list of lines
74 beagle_stderr_lines = beagle_stderr.splitlines()
75 # Prints the error(s)
76 raise Exception('\n'.join((output_line for output_line in beagle_stderr_lines if output_line.startswith('ERROR:'))))
77
78 # Print output if not completed and no error found. Unlikely to be used, but included.
79 else:
80 # Delete the beagle log file
81 delete_beagle_log(output_prefix)
82
83 raise Exception(beagle_stderr)
84
85
86 def standard_beagle_call (beagle_path, beagle_call_args, output_prefix):
87 '''
88 Calls beagle using subprocess
89
90 This function is used to call beagle under standard conditions. The
91 functions then passes the stderr to check_beagle_for_errors to check
92 for errors.
93
94 Parameters
95 ----------
96 beagle_path : str
97 Path to beagle.jar
98 beagle_call_args : list
99 Argument list for beagle
100 output_prefix : str
101 Output file prefix
102 '''
103
104 # Assign location of beagle jar file
105 beagle_jar = os.path.join(beagle_path, 'beagle.jar')
106
107 # Check that beagle.jar exists
108 if not os.path.isfile(beagle_jar):
109 raise IOError('beagle.jar not found. Path specified: %s' % beagle_path)
110
111 logging.info('beagle phasing parameters assigned')
112
113 # Phasing subprocess call
114 phase_call = subprocess.Popen(['java', '-jar', beagle_jar] + beagle_call_args, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
115 phase_stdout, phase_stderr = phase_call.communicate()
116
117 # Check if code is running in python 3
118 if sys.version_info[0] == 3:
119 # Convert bytes to string
120 phase_stderr = phase_stderr.decode()
121
122 # Check beagle call for errors
123 check_beagle_for_errors(phase_stderr, output_prefix)
124
125 logging.info('beagle phasing complete')
126
127 def call_beagle (beagle_path, beagle_call_args, output_prefix, output_format):
128 '''
129 Automates beagle calls
130
131 This function passes the argument list to standard_beagle_call. Once the
132 beagle call has finished, the function will automatically convert the
133 bgzip compressed output of beagle to BCF and VCF, if either format is
134 specified.
135
136 Parameters
137 ----------
138 beagle_path : str
139 Path to beagle.jar
140 beagle_call_args : list
141 Argument list for beagle
142 output_prefix : str
143 Output file prefix
144 output_format : str
145 Output file format
146 '''
147 print beagle_call_args
148 # Standard call to beagle
149 standard_beagle_call(beagle_path, beagle_call_args, output_prefix)
150
151 # Decompress if a VCF files is requested
152 if output_format == 'vcf':
153 bgzip_decompress_vcfgz(output_prefix + '.vcf.gz')
154
155 # Convert to BCF if requested
156 elif output_format == 'bcf':
157
158 # Check if there is an index file
159 if check_for_index(output_prefix + '.vcf.gz') == False:
160 # Create an index if not found
161 create_index(output_prefix + '.vcf.gz')
162 # Convert vcf.gz to bcf
163 convert_to_bcf(output_prefix + '.vcf.gz', output_prefix)