annotate call_amr_mutations.py @ 6:0a4835bee6a6 draft default tip

Uploaded
author greg
date Tue, 21 Mar 2023 20:15:14 +0000
parents bafbed02fdd2
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
30b17a17c1aa Uploaded
greg
parents:
diff changeset
1 #!/usr/bin/env python
30b17a17c1aa Uploaded
greg
parents:
diff changeset
2
6
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
3 # NOTE: This tool provides the functionality of both the PIMA filter_varsacn() function
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
4 # here https://github.com/appliedbinf/pima_md/blob/main/pima.py#L3012 and the vcf_varscan()
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
5 # function here https://github.com/appliedbinf/pima_md/blob/main/pima.py#L3027
3
30b17a17c1aa Uploaded
greg
parents:
diff changeset
6
30b17a17c1aa Uploaded
greg
parents:
diff changeset
7 import argparse
6
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
8 import os
3
30b17a17c1aa Uploaded
greg
parents:
diff changeset
9 import subprocess
30b17a17c1aa Uploaded
greg
parents:
diff changeset
10 import sys
6
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
11 import tempfile
3
30b17a17c1aa Uploaded
greg
parents:
diff changeset
12
30b17a17c1aa Uploaded
greg
parents:
diff changeset
13
6
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
14 def run_command(cmd):
3
30b17a17c1aa Uploaded
greg
parents:
diff changeset
15 try:
6
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
16 tmp_name = tempfile.NamedTemporaryFile(dir=".").name
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
17 tmp_stderr = open(tmp_name, 'wb')
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
18 proc = subprocess.Popen(args=cmd, shell=True, stderr=tmp_stderr.fileno())
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
19 returncode = proc.wait()
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
20 tmp_stderr.close()
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
21 if returncode != 0:
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
22 # Get stderr, allowing for case where it's very large.
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
23 tmp_stderr = open(tmp_name, 'rb')
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
24 stderr = ''
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
25 buffsize = 1048576
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
26 try:
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
27 while True:
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
28 stderr += tmp_stderr.read(buffsize)
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
29 if not stderr or len(stderr) % buffsize != 0:
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
30 break
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
31 except OverflowError:
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
32 pass
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
33 tmp_stderr.close()
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
34 os.remove(tmp_name)
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
35 stop_err(stderr)
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
36 except Exception as e:
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
37 stop_err('Command:\n%s\n\nended with error:\n%s\n\n' % (cmd, str(e)))
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
38
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
39
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
40 def stop_err(msg):
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
41 sys.stderr.write(msg)
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
42 sys.exit(1)
3
30b17a17c1aa Uploaded
greg
parents:
diff changeset
43
30b17a17c1aa Uploaded
greg
parents:
diff changeset
44
30b17a17c1aa Uploaded
greg
parents:
diff changeset
45 def filter_varscan(varscan_raw, output):
30b17a17c1aa Uploaded
greg
parents:
diff changeset
46 cmd = ' '.join(['cat', varscan_raw,
30b17a17c1aa Uploaded
greg
parents:
diff changeset
47 '| awk \'(NR > 1 && $9 == 2 && $5 + $6 >= 15)',
30b17a17c1aa Uploaded
greg
parents:
diff changeset
48 '{OFS = "\\t";f = $6 / ($5 + $6); gsub(/.*\\//, "", $4);s = $4;gsub(/[+\\-]/, "", s);$7 = sprintf("%.2f%%", f * 100);'
30b17a17c1aa Uploaded
greg
parents:
diff changeset
49 'min = 1 / log(length(s) + 2) / log(10) + 2/10;if(f > min){print}}\'',
6
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
50 '1>varscan_tmp'])
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
51 run_command(cmd)
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
52 cmd = ' '.join(['cat varscan_tmp',
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
53 '| awk \'{OFS = "\\t"; print $1,$2,".",$3,$4,-log($14),"PASS",".","GT","1|1"}\'',
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
54 '1>varscan_vcf'])
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
55 run_command(cmd)
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
56 cmd = ' '.join(['cat varscan_vcf',
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
57 '| sort -k 1,1 -k 2n,2n',
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
58 '| awk \'BEGIN{OFS = "\\t";print "##fileformat=VCFv4.2";',
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
59 'print "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE"}{print}\'',
3
30b17a17c1aa Uploaded
greg
parents:
diff changeset
60 '1>' + output])
6
0a4835bee6a6 Uploaded
greg
parents: 5
diff changeset
61 run_command(cmd)
3
30b17a17c1aa Uploaded
greg
parents:
diff changeset
62
30b17a17c1aa Uploaded
greg
parents:
diff changeset
63
30b17a17c1aa Uploaded
greg
parents:
diff changeset
64 if __name__ == '__main__':
30b17a17c1aa Uploaded
greg
parents:
diff changeset
65 parser = argparse.ArgumentParser()
30b17a17c1aa Uploaded
greg
parents:
diff changeset
66
30b17a17c1aa Uploaded
greg
parents:
diff changeset
67 parser.add_argument('--varscan_raw', action='store', dest='varscan_raw', help='Raw varscan mpileup VCF file')
30b17a17c1aa Uploaded
greg
parents:
diff changeset
68 parser.add_argument('--output', action='store', dest='output', help='Output filtered VCF file')
30b17a17c1aa Uploaded
greg
parents:
diff changeset
69
30b17a17c1aa Uploaded
greg
parents:
diff changeset
70 args = parser.parse_args()
30b17a17c1aa Uploaded
greg
parents:
diff changeset
71
30b17a17c1aa Uploaded
greg
parents:
diff changeset
72 filter_varscan(args.varscan_raw, args.output)