diff pca.py @ 27:8997f2ca8c7a

Update to Miller Lab devshed revision bae0d3306d3b
author Richard Burhans <burhans@bx.psu.edu>
date Mon, 15 Jul 2013 10:47:35 -0400
parents 248b06e86022
children
line wrap: on
line diff
--- a/pca.py	Mon Jun 03 12:29:29 2013 -0400
+++ b/pca.py	Mon Jul 15 10:47:35 2013 -0400
@@ -1,39 +1,12 @@
 #!/usr/bin/env python
 
-import errno
+import gd_util
 import os
+import re
 import shutil
-import subprocess
 import sys
 from BeautifulSoup import BeautifulSoup
 import gd_composite
-import re
-
-################################################################################
-
-def mkdir_p(path):
-    try:
-        os.makedirs(path)
-    except OSError, e:
-        if e.errno <> errno.EEXIST:
-            raise
-
-################################################################################
-
-def run_program(prog, args, stdout_file=None):
-    #print "args: ", ' '.join(args)
-    p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    (stdoutdata, stderrdata) = p.communicate()
-    rc = p.returncode
-
-    if stdout_file is not None:
-        with open(stdout_file, 'w') as ofh:
-            print >> ofh, stdoutdata
-
-    if rc != 0:
-        print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
-        print >> sys.stderr, stderrdata
-        sys.exit(1)
 
 ################################################################################
 
@@ -106,15 +79,7 @@
     args.append('-p')
     args.append(par_file)
 
-    #print "args: ", ' '.join(args)
-    p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr)
-    (stdoutdata, stderrdata) = p.communicate()
-    rc = p.returncode
-
-    if rc != 0:
-        print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
-        print >> sys.stderr, stderrdata
-        sys.exit(1)
+    stdoutdata, stderrdata = gd_util.run_program(prog, args)
 
     stats = []
 
@@ -142,7 +107,7 @@
     args.append(':'.join(population_names))
     args.append('-x')
 
-    run_program(None, args)
+    gd_util.run_program(prog, args)
 
 def do_eval2pct(eval_file, explained_file):
     prog = 'eval2pct'
@@ -150,16 +115,8 @@
     args = [ prog ]
     args.append(eval_file)
 
-    with open(explained_file, 'w') as ofh:
-        #print "args:", ' '.join(args)
-        p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=subprocess.PIPE)
-        (stdoutdata, stderrdata) = p.communicate()
-        rc = p.returncode
-
-        if rc != 0:
-            print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
-            print >> sys.stderr, stderrdata
-            sys.exit(1)
+    with open(explained_file, 'w') as fh:
+        gd_util.run_program(prog, args, stdout=fh)
 
 def do_coords2admix(coords_file):
     prog = 'coords2admix'
@@ -167,16 +124,8 @@
     args = [ prog ]
     args.append(coords_file)
 
-    with open('fake', 'w') as ofh:
-        #print "args:", ' '.join(args)
-        p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=subprocess.PIPE)
-        (stdoutdata, stderrdata) = p.communicate()
-        rc = p.returncode
-
-        if rc != 0:
-            print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
-            print >> sys.stderr, stderrdata
-            sys.exit(1)
+    with open('fake', 'w') as fh:
+        gd_util.run_program(prog, args, stdout=fh)
 
     shutil.copy2('fake', coords_file)
 
@@ -203,41 +152,55 @@
 ################################################################################
 
 if len(sys.argv) != 5:
-    print "usage"
-    sys.exit(1)
+    gd_util.die('Usage')
 
 input, input_files_path, output, output_files_path = sys.argv[1:5]
+gd_util.mkdir_p(output_files_path)
 
-mkdir_p(output_files_path)
+################################################################################
 
 ped_file = os.path.join(input_files_path, 'admix.ped')
 geno_file = os.path.join(output_files_path, 'admix.geno')
 do_ped2geno(ped_file, geno_file)
 
+################################################################################
+
 map_file = os.path.join(input_files_path, 'admix.map')
 snp_file = os.path.join(output_files_path, 'admix.snp')
 do_map2snp(map_file, snp_file)
 
+################################################################################
+
 ind_file = os.path.join(output_files_path, 'admix.ind')
 population_names, name_map = make_ind_file(ind_file, input)
 
+################################################################################
+
 par_file = os.path.join(output_files_path, 'par.admix')
 evec_file = os.path.join(output_files_path, 'coordinates.txt')
 eval_file = os.path.join(output_files_path, 'admix.eval')
 make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file)
 
+################################################################################
+
 smartpca_stats = do_smartpca(par_file)
 fix_names(name_map, [ind_file, evec_file])
 
+################################################################################
+
 do_ploteig(evec_file, population_names)
 plot_file = 'coordinates.txt.1:2.{0}.pdf'.format(':'.join(population_names))
 output_plot_file = os.path.join(output_files_path, 'PCA.pdf')
 shutil.copy2(plot_file, output_plot_file)
 os.unlink(plot_file)
 
+################################################################################
+
 do_eval2pct(eval_file, os.path.join(output_files_path, 'explained.txt'))
 os.unlink(eval_file)
 
+################################################################################
+
 do_coords2admix(evec_file)
 
 ################################################################################