diff call_amr_mutations.py @ 6:0a4835bee6a6 draft default tip

Uploaded
author greg
date Tue, 21 Mar 2023 20:15:14 +0000
parents bafbed02fdd2
children
line wrap: on
line diff
--- a/call_amr_mutations.py	Tue Mar 21 19:35:08 2023 +0000
+++ b/call_amr_mutations.py	Tue Mar 21 20:15:14 2023 +0000
@@ -1,20 +1,45 @@
 #!/usr/bin/env python
 
-# NOTE: This tool provides the functionality of the PIMA filter_varsacn()
-# function here https://github.com/appliedbinf/pima_md/blob/main/pima.py#L3012
+# NOTE: This tool provides the functionality of both the PIMA filter_varsacn() function
+# here https://github.com/appliedbinf/pima_md/blob/main/pima.py#L3012 and the vcf_varscan()
+# function here https://github.com/appliedbinf/pima_md/blob/main/pima.py#L3027
 
 import argparse
-import re
+import os
 import subprocess
 import sys
+import tempfile
 
 
-def run_command(command):
+def run_command(cmd):
     try:
-        return re.split('\\n', subprocess.check_output(command, shell=True).decode('utf-8'))
-    except Exception:
-        message = 'Command %s failed: exiting...' % command
-        sys.exit(message)
+        tmp_name = tempfile.NamedTemporaryFile(dir=".").name
+        tmp_stderr = open(tmp_name, 'wb')
+        proc = subprocess.Popen(args=cmd, shell=True, stderr=tmp_stderr.fileno())
+        returncode = proc.wait()
+        tmp_stderr.close()
+        if returncode != 0:
+            # Get stderr, allowing for case where it's very large.
+            tmp_stderr = open(tmp_name, 'rb')
+            stderr = ''
+            buffsize = 1048576
+            try:
+                while True:
+                    stderr += tmp_stderr.read(buffsize)
+                    if not stderr or len(stderr) % buffsize != 0:
+                        break
+            except OverflowError:
+                pass
+            tmp_stderr.close()
+            os.remove(tmp_name)
+            stop_err(stderr)
+    except Exception as e:
+        stop_err('Command:\n%s\n\nended with error:\n%s\n\n' % (cmd, str(e)))
+
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit(1)
 
 
 def filter_varscan(varscan_raw, output):
@@ -22,8 +47,18 @@
                     '| awk \'(NR > 1 && $9 == 2 && $5 + $6 >= 15)',
                     '{OFS = "\\t";f = $6 / ($5 + $6); gsub(/.*\\//, "", $4);s = $4;gsub(/[+\\-]/, "", s);$7 = sprintf("%.2f%%", f * 100);'
                     'min = 1 / log(length(s) + 2) / log(10) + 2/10;if(f > min){print}}\'',
+                    '1>varscan_tmp'])
+    run_command(cmd)
+    cmd = ' '.join(['cat varscan_tmp',
+                    '| awk \'{OFS = "\\t"; print $1,$2,".",$3,$4,-log($14),"PASS",".","GT","1|1"}\'',
+                    '1>varscan_vcf'])
+    run_command(cmd)
+    cmd = ' '.join(['cat varscan_vcf',
+                    '| sort -k 1,1 -k 2n,2n',
+                    '| awk \'BEGIN{OFS = "\\t";print "##fileformat=VCFv4.2";',
+                    'print "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tSAMPLE"}{print}\'',
                     '1>' + output])
-    output = run_command(cmd)
+    run_command(cmd)
 
 
 if __name__ == '__main__':