Mercurial > repos > miller-lab > genome_diversity
annotate prepare_population_structure.py @ 24:248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Tue, 28 May 2013 16:24:19 -0400 |
parents | 2c498d40ecde |
children | 8997f2ca8c7a |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 | |
3 import errno | |
4 import os | |
5 import shutil | |
6 import subprocess | |
7 import sys | |
8 from Population import Population | |
9 import gd_composite | |
10 | |
11 ################################################################################ | |
12 | |
13 def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list): | |
14 info_page = gd_composite.InfoPage() | |
15 info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset') | |
16 | |
17 display_file = gd_composite.DisplayFile() | |
18 display_value = gd_composite.DisplayValue() | |
19 | |
20 out_ped = gd_composite.Parameter(name='admix.ped', value='admix.ped', display_type=display_file) | |
21 out_map = gd_composite.Parameter(name='admix.map', value='admix.map', display_type=display_file) | |
22 out_use = gd_composite.Parameter(description=using_info, display_type=display_value) | |
23 | |
24 info_page.add_output_parameter(out_ped) | |
25 info_page.add_output_parameter(out_map) | |
26 info_page.add_output_parameter(out_use) | |
27 | |
28 in_min_reads = gd_composite.Parameter(description='Minimum reads covering a SNP, per individual', value=min_reads, display_type=display_value) | |
29 in_min_qual = gd_composite.Parameter(description='Minimum quality value, per individual', value=min_qual, display_type=display_value) | |
30 in_min_spacing = gd_composite.Parameter(description='Minimum spacing between SNPs on the same scaffold', value=min_spacing, display_type=display_value) | |
31 | |
32 info_page.add_input_parameter(in_min_reads) | |
33 info_page.add_input_parameter(in_min_qual) | |
34 info_page.add_input_parameter(in_min_spacing) | |
35 | |
36 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
37 info_page.add_misc(misc_populations) | |
38 | |
39 with open(filename, 'w') as ofh: | |
40 print >> ofh, info_page.render() | |
41 | |
42 def mkdir_p(path): | |
43 try: | |
44 os.makedirs(path) | |
45 except OSError, e: | |
46 if e.errno <> errno.EEXIST: | |
47 raise | |
48 | |
49 def die(message, exit=True): | |
50 print >> sys.stderr, message | |
51 if exit: | |
52 sys.exit(1) | |
53 | |
54 ################################################################################ | |
55 | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
56 if len(sys.argv) < 10: |
0 | 57 die("Usage") |
58 | |
59 # parse command line | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
60 input_snp_filename, input_type, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:8] |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
61 args = sys.argv[8:] |
0 | 62 |
63 individual_metadata = [] | |
64 population_files = [] | |
65 population_names = [] | |
66 all_individuals = False | |
67 | |
68 for arg in args: | |
69 if arg == 'all_individuals': | |
70 all_individuals = True | |
71 elif len(arg) > 11: | |
72 tag = arg[:11] | |
73 value = arg[11:] | |
74 if tag == 'individual:': | |
75 individual_metadata.append(value) | |
76 elif tag == 'population:': | |
77 filename, name = value.split(':', 1) | |
78 population_files.append(filename) | |
79 population_names.append(name) | |
80 | |
81 p_total = Population() | |
82 p_total.from_tag_list(individual_metadata) | |
83 | |
84 individual_population = {} | |
85 | |
86 population_list = [] | |
87 | |
88 if all_individuals: | |
89 p1 = p_total | |
90 p1.name = 'All Individuals' | |
91 population_list.append(p1) | |
92 else: | |
93 p1 = Population() | |
94 for idx in range(len(population_files)): | |
95 population_file = population_files[idx] | |
96 population_name = population_names[idx] | |
97 this_pop = Population(population_name) | |
98 this_pop.from_population_file(population_file) | |
99 population_list.append(this_pop) | |
100 p1.from_population_file(population_file) | |
101 tags = p1.tag_list() | |
102 for tag in tags: | |
103 if tag not in individual_population: | |
104 individual_population[tag] = population_name | |
105 | |
106 if not p_total.is_superset(p1): | |
107 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' | |
108 sys.exit(1) | |
109 | |
110 # run tool | |
111 prog = 'admix_prep' | |
112 | |
113 args = [] | |
114 args.append(prog) | |
115 args.append(input_snp_filename) | |
116 args.append(min_reads) | |
117 args.append(min_qual) | |
118 args.append(min_spacing) | |
119 | |
120 tags = p1.tag_list() | |
121 for tag in tags: | |
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
122 if input_type == 'gd_genotype': |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
123 column, name = tag.split(':') |
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
124 tag = '{0}:{1}'.format(int(column) - 2, name) |
0 | 125 args.append(tag) |
126 | |
127 #print "args:", ' '.join(args) | |
128 p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) | |
129 (stdoutdata, stderrdata) = p.communicate() | |
130 rc = p.returncode | |
131 | |
132 if rc != 0: | |
133 die('admix_prep failed: rc={0}'.format(rc)) | |
134 | |
135 using_info = stdoutdata.rstrip('\r\n') | |
136 mkdir_p(output_files_path) | |
137 output_ped_filename = os.path.join(output_files_path, 'admix.ped') | |
138 output_map_filename = os.path.join(output_files_path, 'admix.map') | |
139 shutil.copy2('admix.ped', output_ped_filename) | |
140 shutil.copy2('admix.map', output_map_filename) | |
141 do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list) | |
142 | |
143 os.unlink('admix.ped') | |
144 os.unlink('admix.map') | |
145 | |
146 sys.exit(0) | |
147 |