Mercurial > repos > miller-lab > genome_diversity
diff pca.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 248b06e86022 |
children |
line wrap: on
line diff
--- a/pca.py Mon Jun 03 12:29:29 2013 -0400 +++ b/pca.py Mon Jul 15 10:47:35 2013 -0400 @@ -1,39 +1,12 @@ #!/usr/bin/env python -import errno +import gd_util import os +import re import shutil -import subprocess import sys from BeautifulSoup import BeautifulSoup import gd_composite -import re - -################################################################################ - -def mkdir_p(path): - try: - os.makedirs(path) - except OSError, e: - if e.errno <> errno.EEXIST: - raise - -################################################################################ - -def run_program(prog, args, stdout_file=None): - #print "args: ", ' '.join(args) - p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (stdoutdata, stderrdata) = p.communicate() - rc = p.returncode - - if stdout_file is not None: - with open(stdout_file, 'w') as ofh: - print >> ofh, stdoutdata - - if rc != 0: - print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) - print >> sys.stderr, stderrdata - sys.exit(1) ################################################################################ @@ -106,15 +79,7 @@ args.append('-p') args.append(par_file) - #print "args: ", ' '.join(args) - p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) - (stdoutdata, stderrdata) = p.communicate() - rc = p.returncode - - if rc != 0: - print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) - print >> sys.stderr, stderrdata - sys.exit(1) + stdoutdata, stderrdata = gd_util.run_program(prog, args) stats = [] @@ -142,7 +107,7 @@ args.append(':'.join(population_names)) args.append('-x') - run_program(None, args) + gd_util.run_program(prog, args) def do_eval2pct(eval_file, explained_file): prog = 'eval2pct' @@ -150,16 +115,8 @@ args = [ prog ] args.append(eval_file) - with open(explained_file, 'w') as ofh: - #print "args:", ' '.join(args) - p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=subprocess.PIPE) - (stdoutdata, stderrdata) = p.communicate() - rc = p.returncode - - if rc != 0: - print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) - print >> sys.stderr, stderrdata - sys.exit(1) + with open(explained_file, 'w') as fh: + gd_util.run_program(prog, args, stdout=fh) def do_coords2admix(coords_file): prog = 'coords2admix' @@ -167,16 +124,8 @@ args = [ prog ] args.append(coords_file) - with open('fake', 'w') as ofh: - #print "args:", ' '.join(args) - p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=ofh, stderr=subprocess.PIPE) - (stdoutdata, stderrdata) = p.communicate() - rc = p.returncode - - if rc != 0: - print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) - print >> sys.stderr, stderrdata - sys.exit(1) + with open('fake', 'w') as fh: + gd_util.run_program(prog, args, stdout=fh) shutil.copy2('fake', coords_file) @@ -203,41 +152,55 @@ ################################################################################ if len(sys.argv) != 5: - print "usage" - sys.exit(1) + gd_util.die('Usage') input, input_files_path, output, output_files_path = sys.argv[1:5] +gd_util.mkdir_p(output_files_path) -mkdir_p(output_files_path) +################################################################################ ped_file = os.path.join(input_files_path, 'admix.ped') geno_file = os.path.join(output_files_path, 'admix.geno') do_ped2geno(ped_file, geno_file) +################################################################################ + map_file = os.path.join(input_files_path, 'admix.map') snp_file = os.path.join(output_files_path, 'admix.snp') do_map2snp(map_file, snp_file) +################################################################################ + ind_file = os.path.join(output_files_path, 'admix.ind') population_names, name_map = make_ind_file(ind_file, input) +################################################################################ + par_file = os.path.join(output_files_path, 'par.admix') evec_file = os.path.join(output_files_path, 'coordinates.txt') eval_file = os.path.join(output_files_path, 'admix.eval') make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file) +################################################################################ + smartpca_stats = do_smartpca(par_file) fix_names(name_map, [ind_file, evec_file]) +################################################################################ + do_ploteig(evec_file, population_names) plot_file = 'coordinates.txt.1:2.{0}.pdf'.format(':'.join(population_names)) output_plot_file = os.path.join(output_files_path, 'PCA.pdf') shutil.copy2(plot_file, output_plot_file) os.unlink(plot_file) +################################################################################ + do_eval2pct(eval_file, os.path.join(output_files_path, 'explained.txt')) os.unlink(eval_file) +################################################################################ + do_coords2admix(evec_file) ################################################################################