Mercurial > repos > miller-lab > genome_diversity
diff dpmix.py @ 31:a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:25:27 -0400 |
parents | 8997f2ca8c7a |
children |
line wrap: on
line diff
--- a/dpmix.py Fri Jul 26 12:51:13 2013 -0400 +++ b/dpmix.py Fri Sep 20 13:25:27 2013 -0400 @@ -8,6 +8,20 @@ from dpmix_plot import make_dpmix_plot from LocationFile import LocationFile +def load_and_check_pop(name, file, total_pop): + p = Population(name=name) + p.from_population_file(file) + if not total_pop.is_superset(p): + gd_util.die('There is an individual in {0} that is not in the SNP table'.format(name)) + return p + +def append_pop_tags(the_list, p, input_type, number): + for tag in p.tag_list(): + column, name = tag.split(':') + if input_type == 'gd_genotype': + column = int(column) - 2 + the_list.append('{0}:{1}:{2}'.format(column, number, name)) + ################################################################################ if len(sys.argv) != 22: @@ -16,6 +30,11 @@ input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:] +if ap1_input == '/dev/null': + use_reference = True +else: + use_reference = False + if ap3_input == '/dev/null': populations = 2 else: @@ -39,30 +58,19 @@ p_total = Population() p_total.from_wrapped_dict(ind_arg) -ap1 = Population(name='Ancestral population 1') -ap1.from_population_file(ap1_input) -population_list.append(ap1) -if not p_total.is_superset(ap1): - gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table') +if not use_reference: + ap1 = load_and_check_pop('Ancestral population 1', ap1_input, p_total) + population_list.append(ap1) -ap2 = Population(name='Ancestral population 2') -ap2.from_population_file(ap2_input) +ap2 = load_and_check_pop('Ancestral population 2', ap2_input, p_total) population_list.append(ap2) -if not p_total.is_superset(ap2): - gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table') if populations == 3: - ap3 = Population(name='Ancestral population 3') - ap3.from_population_file(ap3_input) + ap3 = load_and_check_pop('Ancestral population 3', ap3_input, p_total) population_list.append(ap3) - if not p_total.is_superset(ap3): - gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table') -p = Population(name='Potentially admixed') -p.from_population_file(p_input) +p = load_and_check_pop('Potentially admixed', p_input, p_total) population_list.append(p) -if not p_total.is_superset(p): - gd_util.die('There is an individual in the population that is not in the SNP table') gd_util.mkdir_p(output2_dir) @@ -84,42 +92,17 @@ args.append(heterochrom_path) args.append(misc_file) -columns = ap1.column_list() -for column in columns: - col = int(column) - name = ap1.individual_with_column(column).name - first_token = name.split()[0] - if input_type == 'gd_genotype': - col -= 2 - args.append('{0}:1:{1}'.format(col, first_token)) +if use_reference: + args.append('0:1:reference') +else: + append_pop_tags(args, ap1, input_type, 1) -columns = ap2.column_list() -for column in columns: - col = int(column) - name = ap2.individual_with_column(column).name - first_token = name.split()[0] - if input_type == 'gd_genotype': - col -= 2 - args.append('{0}:2:{1}'.format(col, first_token)) +append_pop_tags(args, ap2, input_type, 2) if populations == 3: - columns = ap3.column_list() - for column in columns: - col = int(column) - name = ap3.individual_with_column(column).name - first_token = name.split()[0] - if input_type == 'gd_genotype': - col -= 2 - args.append('{0}:3:{1}'.format(col, first_token)) + append_pop_tags(args, ap3, input_type, 3) -columns = p.column_list() -for column in columns: - col = int(column) - name = p.individual_with_column(column).name - first_token = name.split()[0] - if input_type == 'gd_genotype': - col -= 2 - args.append('{0}:0:{1}'.format(col, first_token)) +append_pop_tags(args, p, input_type, 0) with open(output, 'w') as fh: gd_util.run_program(prog, args, stdout=fh)