Mercurial > repos > miller-lab > genome_diversity
comparison dpmix.py @ 31:a631c2f6d913
Update to Miller Lab devshed revision 3c4110ffacc3
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Fri, 20 Sep 2013 13:25:27 -0400 |
parents | 8997f2ca8c7a |
children |
comparison
equal
deleted
inserted
replaced
30:4188853b940b | 31:a631c2f6d913 |
---|---|
6 from Population import Population | 6 from Population import Population |
7 import gd_composite | 7 import gd_composite |
8 from dpmix_plot import make_dpmix_plot | 8 from dpmix_plot import make_dpmix_plot |
9 from LocationFile import LocationFile | 9 from LocationFile import LocationFile |
10 | 10 |
11 def load_and_check_pop(name, file, total_pop): | |
12 p = Population(name=name) | |
13 p.from_population_file(file) | |
14 if not total_pop.is_superset(p): | |
15 gd_util.die('There is an individual in {0} that is not in the SNP table'.format(name)) | |
16 return p | |
17 | |
18 def append_pop_tags(the_list, p, input_type, number): | |
19 for tag in p.tag_list(): | |
20 column, name = tag.split(':') | |
21 if input_type == 'gd_genotype': | |
22 column = int(column) - 2 | |
23 the_list.append('{0}:{1}:{2}'.format(column, number, name)) | |
24 | |
11 ################################################################################ | 25 ################################################################################ |
12 | 26 |
13 if len(sys.argv) != 22: | 27 if len(sys.argv) != 22: |
14 print "usage" | 28 print "usage" |
15 sys.exit(1) | 29 sys.exit(1) |
16 | 30 |
17 input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:] | 31 input, input_type, data_source, switch_penalty, ap1_input, ap1_name, ap2_input, ap2_name, ap3_input, ap3_name, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file, ind_arg, het_arg, add_logs = sys.argv[1:] |
32 | |
33 if ap1_input == '/dev/null': | |
34 use_reference = True | |
35 else: | |
36 use_reference = False | |
18 | 37 |
19 if ap3_input == '/dev/null': | 38 if ap3_input == '/dev/null': |
20 populations = 2 | 39 populations = 2 |
21 else: | 40 else: |
22 populations = 3 | 41 populations = 3 |
37 population_list = [] | 56 population_list = [] |
38 | 57 |
39 p_total = Population() | 58 p_total = Population() |
40 p_total.from_wrapped_dict(ind_arg) | 59 p_total.from_wrapped_dict(ind_arg) |
41 | 60 |
42 ap1 = Population(name='Ancestral population 1') | 61 if not use_reference: |
43 ap1.from_population_file(ap1_input) | 62 ap1 = load_and_check_pop('Ancestral population 1', ap1_input, p_total) |
44 population_list.append(ap1) | 63 population_list.append(ap1) |
45 if not p_total.is_superset(ap1): | |
46 gd_util.die('There is an individual in ancestral population 1 that is not in the SNP table') | |
47 | 64 |
48 ap2 = Population(name='Ancestral population 2') | 65 ap2 = load_and_check_pop('Ancestral population 2', ap2_input, p_total) |
49 ap2.from_population_file(ap2_input) | |
50 population_list.append(ap2) | 66 population_list.append(ap2) |
51 if not p_total.is_superset(ap2): | |
52 gd_util.die('There is an individual in ancestral population 2 that is not in the SNP table') | |
53 | 67 |
54 if populations == 3: | 68 if populations == 3: |
55 ap3 = Population(name='Ancestral population 3') | 69 ap3 = load_and_check_pop('Ancestral population 3', ap3_input, p_total) |
56 ap3.from_population_file(ap3_input) | |
57 population_list.append(ap3) | 70 population_list.append(ap3) |
58 if not p_total.is_superset(ap3): | |
59 gd_util.die('There is an individual in ancestral population 3 that is not in the SNP table') | |
60 | 71 |
61 p = Population(name='Potentially admixed') | 72 p = load_and_check_pop('Potentially admixed', p_input, p_total) |
62 p.from_population_file(p_input) | |
63 population_list.append(p) | 73 population_list.append(p) |
64 if not p_total.is_superset(p): | |
65 gd_util.die('There is an individual in the population that is not in the SNP table') | |
66 | 74 |
67 gd_util.mkdir_p(output2_dir) | 75 gd_util.mkdir_p(output2_dir) |
68 | 76 |
69 ################################################################################ | 77 ################################################################################ |
70 # Create tabular file | 78 # Create tabular file |
82 args.append(add_logs) | 90 args.append(add_logs) |
83 args.append(switch_penalty) | 91 args.append(switch_penalty) |
84 args.append(heterochrom_path) | 92 args.append(heterochrom_path) |
85 args.append(misc_file) | 93 args.append(misc_file) |
86 | 94 |
87 columns = ap1.column_list() | 95 if use_reference: |
88 for column in columns: | 96 args.append('0:1:reference') |
89 col = int(column) | 97 else: |
90 name = ap1.individual_with_column(column).name | 98 append_pop_tags(args, ap1, input_type, 1) |
91 first_token = name.split()[0] | |
92 if input_type == 'gd_genotype': | |
93 col -= 2 | |
94 args.append('{0}:1:{1}'.format(col, first_token)) | |
95 | 99 |
96 columns = ap2.column_list() | 100 append_pop_tags(args, ap2, input_type, 2) |
97 for column in columns: | |
98 col = int(column) | |
99 name = ap2.individual_with_column(column).name | |
100 first_token = name.split()[0] | |
101 if input_type == 'gd_genotype': | |
102 col -= 2 | |
103 args.append('{0}:2:{1}'.format(col, first_token)) | |
104 | 101 |
105 if populations == 3: | 102 if populations == 3: |
106 columns = ap3.column_list() | 103 append_pop_tags(args, ap3, input_type, 3) |
107 for column in columns: | |
108 col = int(column) | |
109 name = ap3.individual_with_column(column).name | |
110 first_token = name.split()[0] | |
111 if input_type == 'gd_genotype': | |
112 col -= 2 | |
113 args.append('{0}:3:{1}'.format(col, first_token)) | |
114 | 104 |
115 columns = p.column_list() | 105 append_pop_tags(args, p, input_type, 0) |
116 for column in columns: | |
117 col = int(column) | |
118 name = p.individual_with_column(column).name | |
119 first_token = name.split()[0] | |
120 if input_type == 'gd_genotype': | |
121 col -= 2 | |
122 args.append('{0}:0:{1}'.format(col, first_token)) | |
123 | 106 |
124 with open(output, 'w') as fh: | 107 with open(output, 'w') as fh: |
125 gd_util.run_program(prog, args, stdout=fh) | 108 gd_util.run_program(prog, args, stdout=fh) |
126 | 109 |
127 ################################################################################ | 110 ################################################################################ |