Mercurial > repos > miller-lab > genome_diversity
view dpmix.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 248b06e86022 |
children | a631c2f6d913 |
line wrap: on
line source
#!/usr/bin/env python import gd_util import sys import os from Population import Population import gd_composite from dpmix_plot import make_dpmix_plot from LocationFile import LocationFile ################################################################################ if len(sys.argv) != 22: print "usage" sys.exit(1) input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:] if ap3_input == '/dev/null': populations = 2 else: populations = 3 chrom = 'all' if het_arg == 'use_installed': loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file) location_file = LocationFile(loc_path) heterochrom_path = location_file.get_values_if_exists(dbkey) if heterochrom_path is None: heterochrom_path = '/dev/null' elif het_arg == 'use_none': heterochrom_path = '/dev/null' else: heterochrom_path = het_arg population_list = [] p_total = Population() p_total.from_wrapped_dict(ind_arg) ap1 = Population(name='Ancestral population 1') ap1.from_population_file(ap1_input) population_list.append(ap1) if not p_total.is_superset(ap1): gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table') ap2 = Population(name='Ancestral population 2') ap2.from_population_file(ap2_input) population_list.append(ap2) if not p_total.is_superset(ap2): gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table') if populations == 3: ap3 = Population(name='Ancestral population 3') ap3.from_population_file(ap3_input) population_list.append(ap3) if not p_total.is_superset(ap3): gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table') p = Population(name='Potentially admixed') p.from_population_file(p_input) population_list.append(p) if not p_total.is_superset(p): gd_util.die('There is an individual in the population that is not in the SNP table') gd_util.mkdir_p(output2_dir) ################################################################################ # Create tabular file ################################################################################ misc_file = os.path.join(output2_dir, 'summary.txt') prog = 'dpmix' args = [ prog ] args.append(input) args.append(ref_column) args.append(chrom) args.append(data_source) args.append(add_logs) args.append(switch_penalty) args.append(heterochrom_path) args.append(misc_file) columns = ap1.column_list() for column in columns: col = int(column) name = ap1.individual_with_column(column).name first_token = name.split()[0] if input_type == 'gd_genotype': col -= 2 args.append('{0}:1:{1}'.format(col, first_token)) columns = ap2.column_list() for column in columns: col = int(column) name = ap2.individual_with_column(column).name first_token = name.split()[0] if input_type == 'gd_genotype': col -= 2 args.append('{0}:2:{1}'.format(col, first_token)) if populations == 3: columns = ap3.column_list() for column in columns: col = int(column) name = ap3.individual_with_column(column).name first_token = name.split()[0] if input_type == 'gd_genotype': col -= 2 args.append('{0}:3:{1}'.format(col, first_token)) columns = p.column_list() for column in columns: col = int(column) name = p.individual_with_column(column).name first_token = name.split()[0] if input_type == 'gd_genotype': col -= 2 args.append('{0}:0:{1}'.format(col, first_token)) with open(output, 'w') as fh: gd_util.run_program(prog, args, stdout=fh) ################################################################################ # Create pdf file ################################################################################ if populations == 3: state2name = { 0:'heterochromatin', 1:ap1_name, 2:ap2_name, 3:ap3_name } else: state2name = { 0:'heterochromatin', 1:ap1_name, 2:ap2_name } pdf_file = os.path.join(output2_dir, 'picture.pdf') make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir, state2name=state2name, populations=populations) ################################################################################ # Create html ################################################################################ info_page = gd_composite.InfoPage() info_page.set_title('dpmix Galaxy Composite Dataset') display_file = gd_composite.DisplayFile() display_value = gd_composite.DisplayValue() out_pdf = gd_composite.Parameter(name='picture.pdf', value='picture.pdf', display_type=display_file) out_misc = gd_composite.Parameter(name='summary.txt', value='summary.txt', display_type=display_file) info_page.add_output_parameter(out_pdf) info_page.add_output_parameter(out_misc) if data_source == '0': data_source_value = 'sequence coverage' elif data_source == '1': data_source_value = 'estimated genotype' in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value) info_page.add_input_parameter(in_data_source) info_page.add_input_parameter(in_switch_penalty) misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) info_page.add_misc(misc_populations) with open(output2, 'w') as ofh: print >> ofh, info_page.render() sys.exit(0)