annotate phylogenetic_tree.py @ 25:cba0d7a63b82

workaround for gd_genotype datatype admix shift int -> float
author Richard Burhans <burhans@bx.psu.edu>
date Wed, 29 May 2013 13:49:19 -0400
parents 248b06e86022
children 8997f2ca8c7a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 import os
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4 import errno
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 import sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 import subprocess
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 import shutil
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8 from Population import Population
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 import gd_composite
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 def mkdir_p(path):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15 os.makedirs(path)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16 except OSError, e:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 if e.errno <> errno.EEXIST:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 raise
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
22 # <command interpreter="python">
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
23 # phylogenetic_tree.py "$input" "$output" "$output.files_path"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
24 #
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
25 # #if $input_type.choice == '0'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
26 # "gd_snp"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
27 # #if $input_type.data_source.choice == '0'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
28 # "sequence_coverage"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
29 # "$input_type.data_source.minimum_coverage"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
30 # "$input_type.data_source.minimum_quality"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
31 # #else if $input_type.data_source.choice == '1'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
32 # "estimated_genotype"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
33 # #else if $input_type.choice == '1'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
34 # "gd_genotype"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
35 # #end if
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
36 #
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
37 # #if $individuals.choice == '0'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
38 # "all_individuals"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
39 # #else if $individuals.choice == '1'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
40 # "$individuals.p1_input"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
41 # #end if
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
42 #
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
43 # #if ((str($input.metadata.scaffold) == str($input.metadata.ref)) and (str($input.metadata.pos) == str($input.metadata.rPos))) or (str($include_reference) == '0')
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
44 # "none"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
45 # #else
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
46 # "$input.metadata.dbkey"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
47 # #end if
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
48 #
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
49 # #set $draw_tree_options = ''.join(str(x) for x in [$branch_style, $scale_style, $length_style, $layout_style])
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
50 # #if $draw_tree_options == ''
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
51 # ""
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
52 # #else
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
53 # "-$draw_tree_options"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
54 # #end if
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
55 #
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
56 # #for $individual_name, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
57 # #set $arg = '%s:%s' % ($individual_col, $individual_name)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
58 # "$arg"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
59 # #end for
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
60 # </command>
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
61
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
62 ################################################################################
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
63
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
64 # if len(sys.argv) < 11:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
65 # print >> sys.stderr, "Usage"
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
66 # sys.exit(1)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
67 #
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
68 # input, p1_input, output, extra_files_path, minimum_coverage, minimum_quality, dbkey, data_source, draw_tree_options = sys.argv[1:10]
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
69 #
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
70 # individual_metadata = sys.argv[10:]
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
71 #
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
72 # # note: TEST THIS
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
73 # if dbkey in ['', '?', 'None']:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
74 # dbkey = 'none'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
75 #
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
76 # p_total = Population()
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
77 # p_total.from_tag_list(individual_metadata)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
78
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
79 if len(sys.argv) < 5:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
80 print >> sys.stderr, 'Usage'
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
81 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
82
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
83 input, output, extra_files_path, input_type = sys.argv[1:5]
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
84 args = sys.argv[5:]
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
85
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
86 data_source = '1'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
87 minimum_coverage = '0'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
88 minimum_quality = '0'
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
89
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
90 if input_type == 'gd_snp':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
91 data_source_arg = args.pop(0)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
92 if data_source_arg == 'sequence_coverage':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
93 data_source = '0'
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
94 minimum_coverage = args.pop(0)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
95 minimum_quality = args.pop(0)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
96 elif data_source_arg == 'estimated_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
97 pass
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
98 else:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
99 print >> sys.stderr, 'Unsupported data_source:', data_source_arg
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
100 sys.exit(1)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
101 elif input_type == 'gd_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
102 pass
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
103 else:
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
104 print >> sys.stderr, 'Unsupported input_type:', input_type
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
105 sys.exit(1)
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
106
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
107 p1_input, dbkey, draw_tree_options = args[:3]
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
108
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
109 # note: TEST THIS
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
110 if dbkey in ['', '?', 'None']:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
111 dbkey = 'none'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
112
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
113 individual_metadata = args[3:]
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
114
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
115 p_total = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
116 p_total.from_tag_list(individual_metadata)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
117
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
118 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
119
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
120 mkdir_p(extra_files_path)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
121
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
122 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
123
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
124 def run_program(prog, args, ofh):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
125 #print "args: ", ' '.join(args)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
126 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=ofh, stderr=subprocess.PIPE)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
127 (stdoutdata, stderrdata) = p.communicate()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
128 rc = p.returncode
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
129 ofh.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
130
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
131 if rc != 0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
132 #print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
133 print >> sys.stderr, stderrdata
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
134 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
135
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
136 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
137
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
138 phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
139 newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
140 ps_outfile = 'tree.ps'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
141 pdf_outfile = os.path.join(extra_files_path, 'tree.pdf')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
142
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
143 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
144
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
145 informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
146 mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
147
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
148 prog = 'dist_mat'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
149
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
150 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
151 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
152 args.append(input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
153 args.append(minimum_coverage)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
154 args.append(minimum_quality)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
155 args.append(dbkey)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
156 args.append(data_source)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
157 args.append(informative_snp_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
158 args.append(mega_distance_matrix_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
159
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
160 if p1_input == "all_individuals":
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
161 tags = p_total.tag_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
162 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
163 p1 = Population()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
164 p1.from_population_file(p1_input)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
165 if not p_total.is_superset(p1):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
166 print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
167 sys.exit(1)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
168 tags = p1.tag_list()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
169
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
170 for tag in tags:
24
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
171 if input_type == 'gd_genotype':
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
172 column, name = tag.split(':')
248b06e86022 Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents: 0
diff changeset
173 tag = '{0}:{1}'.format(int(column) - 2, name)
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
174 args.append(tag)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
175
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
176 fh = open(phylip_outfile, 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
177 run_program(None, args, fh)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
178
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
179 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
180
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
181 prog = 'quicktree'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
182
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
183 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
184 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
185 args.append('-in')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
186 args.append('m')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
187 args.append('-out')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
188 args.append('t')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
189 args.append(phylip_outfile)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
190
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
191 fh = open(newick_outfile, 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
192 run_program(None, args, fh)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
193
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
194 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
195
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
196 prog = 'draw_tree'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
197
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
198 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
199 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
200 if draw_tree_options:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
201 args.append(draw_tree_options)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
202 args.append(newick_outfile)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
203
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
204 fh = open(ps_outfile, 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
205 run_program(None, args, fh)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
206
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
207 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
208
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
209 prog = 'ps2pdf'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
210
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
211 args = []
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
212 args.append(prog)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
213 args.append('-dPDFSETTINGS=/prepress')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
214 args.append(ps_outfile)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
215 args.append('-')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
216
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
217 fh = open(pdf_outfile, 'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
218 run_program(None, args, fh)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
219
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
220 shutil.copyfile(pdf_outfile, output)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
221
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
222 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
223
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
224 info_page = gd_composite.InfoPage()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
225 info_page.set_title('Phylogenetic tree Galaxy Composite Dataset')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
226
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
227 display_file = gd_composite.DisplayFile()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
228 display_value = gd_composite.DisplayValue()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
229
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
230 out_pdf = gd_composite.Parameter(name='tree.pdf', value='tree.pdf', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
231 out_newick = gd_composite.Parameter(value='phylogenetic_tree.newick', name='phylogenetic tree (newick)', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
232 out_phylip = gd_composite.Parameter(value='distance_matrix.phylip', name='Phylip distance matrix', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
233 out_mega = gd_composite.Parameter(value='mega_distance_matrix.txt', name='Mega distance matrix', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
234 out_snps = gd_composite.Parameter(value='informative_snps.txt', name='informative SNPs', display_type=display_file)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
235
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
236 info_page.add_output_parameter(out_pdf)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
237 info_page.add_output_parameter(out_newick)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
238 info_page.add_output_parameter(out_phylip)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
239 info_page.add_output_parameter(out_mega)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
240 info_page.add_output_parameter(out_snps)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
241
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
242 in_min_cov = gd_composite.Parameter(description='Minimum coverage', value=minimum_coverage, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
243 in_min_qual = gd_composite.Parameter(description='Minimum quality', value=minimum_quality, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
244
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
245 include_ref_value = 'no'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
246 if dbkey != 'none':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
247 include_ref_value = 'yes'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
248
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
249 in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
250
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
251 if data_source == '0':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
252 data_source_value = 'sequence coverage'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
253 elif data_source == '1':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
254 data_source_value = 'estimated genotype'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
255
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
256 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
257
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
258 branch_type_value = 'square'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
259 if 'd' in draw_tree_options:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
260 branch_type_value = 'diagonal'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
261
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
262 in_branch_type = gd_composite.Parameter(description='Branch type', value=branch_type_value, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
263
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
264 branch_scale_value = 'yes'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
265 if 's' in draw_tree_options:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
266 branch_scale_value = 'no'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
267
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
268 in_branch_scale = gd_composite.Parameter(description='Draw branches to scale', value=branch_scale_value, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
269
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
270 branch_length_value = 'yes'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
271 if 'b' in draw_tree_options:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
272 branch_length_value = 'no'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
273
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
274 in_branch_length = gd_composite.Parameter(description='Show branch lengths', value=branch_length_value, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
275
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
276 tree_layout_value = 'horizontal'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
277 if 'v' in draw_tree_options:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
278 tree_layout_value = 'vertical'
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
279
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
280 in_tree_layout = gd_composite.Parameter(description='Tree layout', value=tree_layout_value, display_type=display_value)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
281
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
282 info_page.add_input_parameter(in_min_cov)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
283 info_page.add_input_parameter(in_min_qual)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
284 info_page.add_input_parameter(in_include_ref)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
285 info_page.add_input_parameter(in_data_source)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
286 info_page.add_input_parameter(in_branch_type)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
287 info_page.add_input_parameter(in_branch_scale)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
288 info_page.add_input_parameter(in_branch_length)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
289 info_page.add_input_parameter(in_tree_layout)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
290
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
291 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
292
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
293 info_page.add_misc(misc_individuals)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
294
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
295
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
296 with open(output, 'w') as ofh:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
297 print >> ofh, info_page.render()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
298
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
299 ################################################################################
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
300
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
301 sys.exit(0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
302