Mercurial > repos > miller-lab > genome_diversity
diff dpmix.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 248b06e86022 |
children | a631c2f6d913 |
line wrap: on
line diff
--- a/dpmix.py Mon Jun 03 12:29:29 2013 -0400 +++ b/dpmix.py Mon Jul 15 10:47:35 2013 -0400 @@ -1,9 +1,8 @@ #!/usr/bin/env python -import errno +import gd_util import sys import os -import subprocess from Population import Population import gd_composite from dpmix_plot import make_dpmix_plot @@ -11,87 +10,70 @@ ################################################################################ -def mkdir_p(path): - try: - os.makedirs(path) - except OSError, e: - if e.errno <> errno.EEXIST: - raise - -def run_program(prog, args, stdout_file=None, space_to_tab=False): - #print "args: ", ' '.join(args) - p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (stdoutdata, stderrdata) = p.communicate() - rc = p.returncode - - if stdout_file is not None: - with open(stdout_file, 'w') as ofh: - lines = stdoutdata.split('\n') - for line in lines: - line = line.strip() - if line: - if space_to_tab: - line = line.replace(' ', '\t') - print >> ofh, line - - if rc != 0: - print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) - print >> sys.stderr, stderrdata - sys.exit(1) - -################################################################################ - -if len(sys.argv) < 16: +if len(sys.argv) != 22: print "usage" sys.exit(1) -input, input_type, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:15] -individual_metadata = sys.argv[15:] +input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:] + +if ap3_input == '/dev/null': + populations = 2 +else: + populations = 3 chrom = 'all' -add_logs = '0' -loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) -location_file = LocationFile(loc_path) -heterochrom_path = location_file.get_values_if_exists(dbkey) -if heterochrom_path is None: +if het_arg == 'use_installed': + loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) + location_file = LocationFile(loc_path) + heterochrom_path = location_file.get_values_if_exists(dbkey) + if heterochrom_path is None: + heterochrom_path = '/dev/null' +elif het_arg == 'use_none': heterochrom_path = '/dev/null' +else: + heterochrom_path = het_arg population_list = [] p_total = Population() -p_total.from_tag_list(individual_metadata) +p_total.from_wrapped_dict(ind_arg) ap1 = Population(name='Ancestral population 1') ap1.from_population_file(ap1_input) population_list.append(ap1) if not p_total.is_superset(ap1): - print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table' - sys.exit(1) + gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table') ap2 = Population(name='Ancestral population 2') ap2.from_population_file(ap2_input) population_list.append(ap2) if not p_total.is_superset(ap2): - print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table' - sys.exit(1) + gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table') + +if populations == 3: + ap3 = Population(name='Ancestral population 3') + ap3.from_population_file(ap3_input) + population_list.append(ap3) + if not p_total.is_superset(ap3): + gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table') p = Population(name='Potentially admixed') p.from_population_file(p_input) population_list.append(p) if not p_total.is_superset(p): - print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' - sys.exit(1) + gd_util.die('There is an individual in the population that is not in the SNP table') -mkdir_p(output2_dir) +gd_util.mkdir_p(output2_dir) ################################################################################ # Create tabular file ################################################################################ -misc_file = os.path.join(output2_dir, 'misc.txt') +misc_file = os.path.join(output2_dir, 'summary.txt') prog = 'dpmix' + args = [ prog ] args.append(input) args.append(ref_column) @@ -104,33 +86,64 @@ columns = ap1.column_list() for column in columns: + col = int(column) + name = ap1.individual_with_column(column).name + first_token = name.split()[0] if input_type == 'gd_genotype': - args.append('{0}:1:{1}'.format(int(column) - 2, ap1.individual_with_column(column).name)) - else: - args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name)) + col -= 2 + args.append('{0}:1:{1}'.format(col, first_token)) columns = ap2.column_list() for column in columns: + col = int(column) + name = ap2.individual_with_column(column).name + first_token = name.split()[0] if input_type == 'gd_genotype': - args.append('{0}:2:{1}'.format(int(column) - 2, ap2.individual_with_column(column).name)) - else: - args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name)) + col -= 2 + args.append('{0}:2:{1}'.format(col, first_token)) + +if populations == 3: + columns = ap3.column_list() + for column in columns: + col = int(column) + name = ap3.individual_with_column(column).name + first_token = name.split()[0] + if input_type == 'gd_genotype': + col -= 2 + args.append('{0}:3:{1}'.format(col, first_token)) columns = p.column_list() for column in columns: + col = int(column) + name = p.individual_with_column(column).name + first_token = name.split()[0] if input_type == 'gd_genotype': - args.append('{0}:0:{1}'.format(int(column) - 2, p.individual_with_column(column).name)) - else: - args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name)) + col -= 2 + args.append('{0}:0:{1}'.format(col, first_token)) -run_program(None, args, stdout_file=output, space_to_tab=True) +with open(output, 'w') as fh: + gd_util.run_program(prog, args, stdout=fh) ################################################################################ # Create pdf file ################################################################################ -pdf_file = os.path.join(output2_dir, 'dpmix.pdf') -make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir) +if populations == 3: + state2name = { + 0:'heterochromatin', + 1:ap1_name, + 2:ap2_name, + 3:ap3_name + } +else: + state2name = { + 0:'heterochromatin', + 1:ap1_name, + 2:ap2_name + } + +pdf_file = os.path.join(output2_dir, 'picture.pdf') +make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir, state2name=state2name, populations=populations) ################################################################################ # Create html @@ -142,8 +155,8 @@ display_file = gd_composite.DisplayFile() display_value = gd_composite.DisplayValue() -out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file) -out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file) +out_pdf = gd_composite.Parameter(name='picture.pdf', value='picture.pdf', display_type=display_file) +out_misc = gd_composite.Parameter(name='summary.txt', value='summary.txt', display_type=display_file) info_page.add_output_parameter(out_pdf) info_page.add_output_parameter(out_misc) @@ -168,4 +181,3 @@ sys.exit(0) -