Mercurial > repos > miller-lab > genome_diversity
comparison prepare_population_structure.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 248b06e86022 |
children |
comparison
equal
deleted
inserted
replaced
26:91e835060ad2 | 27:8997f2ca8c7a |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 | 2 |
3 import errno | 3 import gd_util |
4 import os | 4 import os |
5 import shutil | 5 import shutil |
6 import subprocess | |
7 import sys | 6 import sys |
8 from Population import Population | 7 from Population import Population |
9 import gd_composite | 8 import gd_composite |
10 | 9 |
11 ################################################################################ | 10 ################################################################################ |
12 | 11 |
13 def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list): | 12 def do_import(filename, files_path, min_reads, min_qual, min_spacing, using_info, population_list): |
14 info_page = gd_composite.InfoPage() | 13 info_page = gd_composite.InfoPage() |
15 info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset') | 14 info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset') |
16 | 15 |
17 display_file = gd_composite.DisplayFile() | 16 display_file = gd_composite.DisplayFile() |
18 display_value = gd_composite.DisplayValue() | 17 display_value = gd_composite.DisplayValue() |
37 info_page.add_misc(misc_populations) | 36 info_page.add_misc(misc_populations) |
38 | 37 |
39 with open(filename, 'w') as ofh: | 38 with open(filename, 'w') as ofh: |
40 print >> ofh, info_page.render() | 39 print >> ofh, info_page.render() |
41 | 40 |
42 def mkdir_p(path): | |
43 try: | |
44 os.makedirs(path) | |
45 except OSError, e: | |
46 if e.errno <> errno.EEXIST: | |
47 raise | |
48 | |
49 def die(message, exit=True): | |
50 print >> sys.stderr, message | |
51 if exit: | |
52 sys.exit(1) | |
53 | |
54 ################################################################################ | 41 ################################################################################ |
55 | 42 |
56 if len(sys.argv) < 10: | 43 if len(sys.argv) < 10: |
57 die("Usage") | 44 gd_util.die('Usage') |
58 | 45 |
59 # parse command line | 46 # parse command line |
60 input_snp_filename, input_type, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:8] | 47 input_snp_filename, input_type, min_reads, min_qual, min_spacing, output_filename, output_files_path, ind_arg = sys.argv[1:9] |
61 args = sys.argv[8:] | 48 args = sys.argv[9:] |
62 | 49 |
63 individual_metadata = [] | |
64 population_files = [] | 50 population_files = [] |
65 population_names = [] | |
66 all_individuals = False | 51 all_individuals = False |
67 | 52 |
68 for arg in args: | 53 for arg in args: |
69 if arg == 'all_individuals': | 54 if arg == 'all_individuals': |
70 all_individuals = True | 55 all_individuals = True |
71 elif len(arg) > 11: | 56 elif len(arg) > 11 and arg[:11] == 'population:': |
72 tag = arg[:11] | 57 file, name = arg[11:].split(':', 1) |
73 value = arg[11:] | 58 population_files.append((file, name)) |
74 if tag == 'individual:': | |
75 individual_metadata.append(value) | |
76 elif tag == 'population:': | |
77 filename, name = value.split(':', 1) | |
78 population_files.append(filename) | |
79 population_names.append(name) | |
80 | 59 |
81 p_total = Population() | 60 p_total = Population() |
82 p_total.from_tag_list(individual_metadata) | 61 p_total.from_wrapped_dict(ind_arg) |
83 | 62 |
84 individual_population = {} | 63 individual_population = {} |
85 | |
86 population_list = [] | 64 population_list = [] |
87 | 65 |
88 if all_individuals: | 66 if all_individuals: |
89 p1 = p_total | 67 p1 = p_total |
90 p1.name = 'All Individuals' | 68 p1.name = 'All Individuals' |
91 population_list.append(p1) | 69 population_list.append(p1) |
92 else: | 70 else: |
93 p1 = Population() | 71 p1 = Population() |
94 for idx in range(len(population_files)): | 72 for file, name in population_files: |
95 population_file = population_files[idx] | 73 this_pop = Population(name) |
96 population_name = population_names[idx] | 74 this_pop.from_population_file(file) |
97 this_pop = Population(population_name) | |
98 this_pop.from_population_file(population_file) | |
99 population_list.append(this_pop) | 75 population_list.append(this_pop) |
100 p1.from_population_file(population_file) | 76 |
101 tags = p1.tag_list() | 77 for tag in this_pop.tag_list(): |
102 for tag in tags: | |
103 if tag not in individual_population: | 78 if tag not in individual_population: |
104 individual_population[tag] = population_name | 79 individual_population[tag] = name |
80 | |
81 # add individuals from this file to p1 | |
82 p1.from_population_file(file) | |
83 | |
105 | 84 |
106 if not p_total.is_superset(p1): | 85 if not p_total.is_superset(p1): |
107 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | 86 gd_util.die('There is an individual in the population that is not in the SNP table') |
108 sys.exit(1) | |
109 | 87 |
110 # run tool | 88 ################################################################################ |
89 | |
111 prog = 'admix_prep' | 90 prog = 'admix_prep' |
112 | 91 |
113 args = [] | 92 args = [ prog ] |
114 args.append(prog) | |
115 args.append(input_snp_filename) | 93 args.append(input_snp_filename) |
116 args.append(min_reads) | 94 args.append(min_reads) |
117 args.append(min_qual) | 95 args.append(min_qual) |
118 args.append(min_spacing) | 96 args.append(min_spacing) |
119 | 97 |
120 tags = p1.tag_list() | 98 for tag in p1.tag_list(): |
121 for tag in tags: | |
122 if input_type == 'gd_genotype': | 99 if input_type == 'gd_genotype': |
123 column, name = tag.split(':') | 100 column, name = tag.split(':', 1) |
124 tag = '{0}:{1}'.format(int(column) - 2, name) | 101 tag = '{0}:{1}'.format(int(column) - 2, name) |
125 args.append(tag) | 102 args.append(tag) |
126 | 103 |
127 #print "args:", ' '.join(args) | 104 stdoutdata, stderrdata = gd_util.run_program(prog, args) |
128 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) | 105 using_info = stdoutdata.rstrip('\r\n') |
129 (stdoutdata, stderrdata) = p.communicate() | |
130 rc = p.returncode | |
131 | 106 |
132 if rc != 0: | 107 ################################################################################ |
133 die('admix_prep failed: rc={0}'.format(rc)) | |
134 | 108 |
135 using_info = stdoutdata.rstrip('\r\n') | 109 gd_util.mkdir_p(output_files_path) |
136 mkdir_p(output_files_path) | 110 |
137 output_ped_filename = os.path.join(output_files_path, 'admix.ped') | 111 output_ped_filename = os.path.join(output_files_path, 'admix.ped') |
138 output_map_filename = os.path.join(output_files_path, 'admix.map') | 112 output_map_filename = os.path.join(output_files_path, 'admix.map') |
139 shutil.copy2('admix.ped', output_ped_filename) | 113 shutil.copy2('admix.ped', output_ped_filename) |
140 shutil.copy2('admix.map', output_map_filename) | 114 shutil.copy2('admix.map', output_map_filename) |
141 do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list) | |
142 | 115 |
143 os.unlink('admix.ped') | 116 do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, using_info, population_list) |
144 os.unlink('admix.map') | |
145 | |
146 sys.exit(0) | 117 sys.exit(0) |
147 | 118 |