comparison call_amr_mutations.py @ 6:0a4835bee6a6 draft default tip

Uploaded
author greg
date Tue, 21 Mar 2023 20:15:14 +0000
parents bafbed02fdd2
children
comparison
equal deleted inserted replaced
5:bafbed02fdd2 6:0a4835bee6a6
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 2
3 # NOTE: This tool provides the functionality of the PIMA filter_varsacn() 3 # NOTE: This tool provides the functionality of both the PIMA filter_varsacn() function
4 # function here https://github.com/appliedbinf/pima_md/blob/main/pima.py#L3012 4 # here https://github.com/appliedbinf/pima_md/blob/main/pima.py#L3012 and the vcf_varscan()
5 # function here https://github.com/appliedbinf/pima_md/blob/main/pima.py#L3027
5 6
6 import argparse 7 import argparse
7 import re 8 import os
8 import subprocess 9 import subprocess
9 import sys 10 import sys
11 import tempfile
10 12
11 13
12 def run_command(command): 14 def run_command(cmd):
13 try: 15 try:
14 return re.split('\\n', subprocess.check_output(command, shell=True).decode('utf-8')) 16 tmp_name = tempfile.NamedTemporaryFile(dir=".").name
15 except Exception: 17 tmp_stderr = open(tmp_name, 'wb')
16 message = 'Command %s failed: exiting...' % command 18 proc = subprocess.Popen(args=cmd, shell=True, stderr=tmp_stderr.fileno())
17 sys.exit(message) 19 returncode = proc.wait()
20 tmp_stderr.close()
21 if returncode != 0:
22 # Get stderr, allowing for case where it's very large.
23 tmp_stderr = open(tmp_name, 'rb')
24 stderr = ''
25 buffsize = 1048576
26 try:
27 while True:
28 stderr += tmp_stderr.read(buffsize)
29 if not stderr or len(stderr) % buffsize != 0:
30 break
31 except OverflowError:
32 pass
33 tmp_stderr.close()
34 os.remove(tmp_name)
35 stop_err(stderr)
36 except Exception as e:
37 stop_err('Command:\n%s\n\nended with error:\n%s\n\n' % (cmd, str(e)))
38
39
40 def stop_err(msg):
41 sys.stderr.write(msg)
42 sys.exit(1)
18 43
19 44
20 def filter_varscan(varscan_raw, output): 45 def filter_varscan(varscan_raw, output):
21 cmd = ' '.join(['cat', varscan_raw, 46 cmd = ' '.join(['cat', varscan_raw,
22 '| awk \'(NR > 1 && $9 == 2 && $5 + $6 >= 15)', 47 '| awk \'(NR > 1 && $9 == 2 && $5 + $6 >= 15)',
23 '{OFS = "\\t";f = $6 / ($5 + $6); gsub(/.*\\//, "", $4);s = $4;gsub(/[+\\-]/, "", s);$7 = sprintf("%.2f%%", f * 100);' 48 '{OFS = "\\t";f = $6 / ($5 + $6); gsub(/.*\\//, "", $4);s = $4;gsub(/[+\\-]/, "", s);$7 = sprintf("%.2f%%", f * 100);'
24 'min = 1 / log(length(s) + 2) / log(10) + 2/10;if(f > min){print}}\'', 49 'min = 1 / log(length(s) + 2) / log(10) + 2/10;if(f > min){print}}\'',
50 '1>varscan_tmp'])
51 run_command(cmd)
52 cmd = ' '.join(['cat varscan_tmp',
53 '| awk \'{OFS = "\\t"; print $1,$2,".",$3,$4,-log($14),"PASS",".","GT","1|1"}\'',
54 '1>varscan_vcf'])
55 run_command(cmd)
56 cmd = ' '.join(['cat varscan_vcf',
57 '| sort -k 1,1 -k 2n,2n',
58 '| awk \'BEGIN{OFS = "\\t";print "##fileformat=VCFv4.2";',
59 'print "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE"}{print}\'',
25 '1>' + output]) 60 '1>' + output])
26 output = run_command(cmd) 61 run_command(cmd)
27 62
28 63
29 if __name__ == '__main__': 64 if __name__ == '__main__':
30 parser = argparse.ArgumentParser() 65 parser = argparse.ArgumentParser()
31 66