annotate pca.py @ 29:fb944979bf35

Update to Miller Lab devshed revision 5f0be4d1db30
author Richard Burhans <burhans@bx.psu.edu>
date Thu, 25 Jul 2013 12:01:47 -0400
parents 8997f2ca8c7a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
3 import gd_util
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4 import os
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
5 import re
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 import shutil
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8 from BeautifulSoup import BeautifulSoup
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 import gd_composite
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 def do_ped2geno(input, output):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14 lines = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15 with open(input) as fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16 for line in fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 line = line.rstrip('\r\n')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 lines.append(line.split())
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20 pair_map = {
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21 '0':{ '0':'9', '1':'9', '2':'9' },
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22 '1':{ '0':'1', '1':'2', '2':'1' },
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23 '2':{ '0':'1', '1':'1', '2':'0' }
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 }
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25 with open(output, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26 for a_idx in xrange(6, len(lines[0]), 2):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27 b_idx = a_idx + 1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28 print >> ofh, ''.join(map(lambda line: pair_map[line[a_idx]][line[b_idx]], lines))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 def do_map2snp(input, output):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31 with open(output, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32 with open(input) as fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33 for line in fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 elems = line.split()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35 print >> ofh, ' {0} 11 0.002 2000 A T'.format(elems[1])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37 def make_ind_file(ind_file, input):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38 pops = []
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
39 name_map = []
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
40 name_idx = 0
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42 ofh = open(ind_file, 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 with open(input) as fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 soup = BeautifulSoup(fh)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 misc = soup.find('div', {'id': 'gd_misc'})
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47 populations = misc('ul')[0]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49 i = 0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50 for entry in populations:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51 if i % 2 == 1:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52 population_name = entry.contents[0].encode('utf8').strip().replace(' ', '_')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53 pops.append(population_name)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 individuals = entry.ol('li')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55 for individual in individuals:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56 individual_name = individual.string.encode('utf8').strip()
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
57 name_map.append(individual_name)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
58 print >> ofh, 'ind_%s' % name_idx, 'M', population_name
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
59 name_idx += 1
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
60 i += 1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
61
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
62 ofh.close()
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
63 return pops, name_map
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
64
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
65 def make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
66 with open(par_file, 'w') as fh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
67 print >> fh, 'genotypename: {0}'.format(geno_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
68 print >> fh, 'snpname: {0}'.format(snp_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
69 print >> fh, 'indivname: {0}'.format(ind_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
70 print >> fh, 'evecoutname: {0}'.format(evec_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
71 print >> fh, 'evaloutname: {0}'.format(eval_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
72 print >> fh, 'altnormstyle: NO'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
73 print >> fh, 'numoutevec: 2'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
74
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
75 def do_smartpca(par_file):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
76 prog = 'smartpca'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
77
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
78 args = [ prog ]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
79 args.append('-p')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
80 args.append(par_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
81
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
82 stdoutdata, stderrdata = gd_util.run_program(prog, args)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
83
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
84 stats = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
85
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
86 save_line = False
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
87 for line in stdoutdata.split('\n'):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
88 if line.startswith(('## Average divergence', '## Anova statistics', '## Statistical significance')):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
89 stats.append('')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
90 save_line = True
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
91 if line.strip() == '':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
92 save_line = False
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
93 if save_line:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
94 stats.append(line)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
95
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
96 return '\n'.join(stats[1:])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
97
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
98 def do_ploteig(evec_file, population_names):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
99 prog = 'gd_ploteig'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
100
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
101 args = [ prog ]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
102 args.append('-i')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
103 args.append(evec_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
104 args.append('-c')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
105 args.append('1:2')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
106 args.append('-p')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
107 args.append(':'.join(population_names))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
108 args.append('-x')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
109
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
110 gd_util.run_program(prog, args)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
111
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
112 def do_eval2pct(eval_file, explained_file):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
113 prog = 'eval2pct'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
114
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
115 args = [ prog ]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
116 args.append(eval_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
117
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
118 with open(explained_file, 'w') as fh:
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
119 gd_util.run_program(prog, args, stdout=fh)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
120
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
121 def do_coords2admix(coords_file):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
122 prog = 'coords2admix'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
123
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
124 args = [ prog ]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
125 args.append(coords_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
126
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
127 with open('fake', 'w') as fh:
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
128 gd_util.run_program(prog, args, stdout=fh)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
129
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
130 shutil.copy2('fake', coords_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
131
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
132 ind_regex = re.compile('ind_([0-9]+)')
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
133
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
134 def fix_names(name_map, files):
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
135 for file in files:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
136 tmp_filename = '%s.tmp' % file
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
137 with open(tmp_filename, 'w') as ofh:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
138 with open(file) as fh:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
139 for line in fh:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
140 line = line.rstrip('\r\n')
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
141 match = ind_regex.search(line)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
142 if match:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
143 idx = int(match.group(1))
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
144 old = 'ind_%s' % idx
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
145 new = name_map[idx].replace(' ', '_')
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
146 line = line.replace(old, new)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
147 print >> ofh, line
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
148
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
149 shutil.copy2(tmp_filename, file)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
150 os.unlink(tmp_filename)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
151
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
152 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
153
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
154 if len(sys.argv) != 5:
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
155 gd_util.die('Usage')
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
156
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
157 input, input_files_path, output, output_files_path = sys.argv[1:5]
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
158 gd_util.mkdir_p(output_files_path)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
159
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
160 ################################################################################
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
161
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
162 ped_file = os.path.join(input_files_path, 'admix.ped')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
163 geno_file = os.path.join(output_files_path, 'admix.geno')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
164 do_ped2geno(ped_file, geno_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
165
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
166 ################################################################################
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
167
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
168 map_file = os.path.join(input_files_path, 'admix.map')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
169 snp_file = os.path.join(output_files_path, 'admix.snp')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
170 do_map2snp(map_file, snp_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
171
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
172 ################################################################################
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
173
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
174 ind_file = os.path.join(output_files_path, 'admix.ind')
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
175 population_names, name_map = make_ind_file(ind_file, input)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
176
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
177 ################################################################################
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
178
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
179 par_file = os.path.join(output_files_path, 'par.admix')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
180 evec_file = os.path.join(output_files_path, 'coordinates.txt')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
181 eval_file = os.path.join(output_files_path, 'admix.eval')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
182 make_par_file(par_file, geno_file, snp_file, ind_file, evec_file, eval_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
183
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
184 ################################################################################
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
185
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
186 smartpca_stats = do_smartpca(par_file)
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
187 fix_names(name_map, [ind_file, evec_file])
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
188
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
189 ################################################################################
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
190
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
191 do_ploteig(evec_file, population_names)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
192 plot_file = 'coordinates.txt.1:2.{0}.pdf'.format(':'.join(population_names))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
193 output_plot_file = os.path.join(output_files_path, 'PCA.pdf')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
194 shutil.copy2(plot_file, output_plot_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
195 os.unlink(plot_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
196
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
197 ################################################################################
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
198
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
199 do_eval2pct(eval_file, os.path.join(output_files_path, 'explained.txt'))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
200 os.unlink(eval_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
201
27
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
202 ################################################################################
8997f2ca8c7a Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents: 24
diff changeset
203
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
204 do_coords2admix(evec_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
205
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
206 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
207
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
208 info_page = gd_composite.InfoPage()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
209 info_page.set_title('PCA Galaxy Composite Dataset')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
210
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
211 display_file = gd_composite.DisplayFile()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
212 display_value = gd_composite.DisplayValue()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
213
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
214 out_pdf = gd_composite.Parameter(name='PCA.pdf', value='PCA.pdf', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
215 out_evec = gd_composite.Parameter(name='coordinates.txt', value='coordinates.txt', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
216 out_explained = gd_composite.Parameter(name='explained.txt', value='explained.txt', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
217
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
218 evec_prefix = 'coordinates.txt.1:2.{0}'.format(':'.join(population_names))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
219 ps_file = '{0}.ps'.format(evec_prefix)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
220 xtxt_file = '{0}.xtxt'.format(evec_prefix)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
221
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
222 os.unlink(os.path.join(output_files_path, ps_file))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
223 os.unlink(os.path.join(output_files_path, xtxt_file))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
224
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
225 info_page.add_output_parameter(out_pdf)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
226 info_page.add_output_parameter(out_evec)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
227 info_page.add_output_parameter(out_explained)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
228
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
229 in_admix = gd_composite.Parameter(name='par.admix', value='par.admix', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
230 in_geno = gd_composite.Parameter(name='admix.geno', value='admix.geno', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
231 in_snp = gd_composite.Parameter(name='admix.snp', value='admix.snp', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
232 in_ind = gd_composite.Parameter(name='admix.ind', value='admix.ind', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
233
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
234 info_page.add_input_parameter(in_admix)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
235 info_page.add_input_parameter(in_geno)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
236 info_page.add_input_parameter(in_snp)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
237 info_page.add_input_parameter(in_ind)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
238
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
239 misc_stats = gd_composite.Parameter(description='Stats<p/><pre>\n{0}\n</pre>'.format(smartpca_stats), display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
240
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
241 info_page.add_misc(misc_stats)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
242
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
243 with open (output, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
244 print >> ofh, info_page.render()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
245
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
246 sys.exit(0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
247