Mercurial > repos > miller-lab > genome_diversity
annotate phylogenetic_tree.py @ 33:5064f618ec1c
remove munkres dependency
| author | Richard Burhans <burhans@bx.psu.edu> |
|---|---|
| date | Fri, 20 Sep 2013 14:01:30 -0400 |
| parents | 8997f2ca8c7a |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
3 import gd_util |
| 0 | 4 import os |
| 5 import sys | |
| 6 from Population import Population | |
| 7 import gd_composite | |
| 8 | |
| 9 ################################################################################ | |
| 10 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
11 if len(sys.argv) != 12: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
12 gd_util.die('Usage') |
| 0 | 13 |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
14 input, output, extra_files_path, input_type, data_source_arg, minimum_coverage, minimum_quality, p1_input, dbkey, draw_tree_options, ind_arg = sys.argv[1:] |
| 0 | 15 |
|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
16 if input_type == 'gd_snp': |
|
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
17 if data_source_arg == 'sequence_coverage': |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
18 data_source = 0 |
|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
19 elif data_source_arg == 'estimated_genotype': |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
20 data_source = 1 |
|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
21 else: |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
22 gd_util.die('Unsupported data_source: {0}'.format(data_source_arg)) |
|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
23 elif input_type == 'gd_genotype': |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
24 data_source = 1 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
25 minimum_coverage = 0 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
26 minimum_quality = 0 |
|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
27 else: |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
28 gd_util.die('Unsupported input_type:: {0}'.format(input_type)) |
| 0 | 29 |
| 30 # note: TEST THIS | |
| 31 if dbkey in ['', '?', 'None']: | |
| 32 dbkey = 'none' | |
| 33 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
34 p_total = Population() |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
35 p_total.from_wrapped_dict(ind_arg) |
|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
36 |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
37 if p1_input == "all_individuals": |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
38 tags = p_total.tag_list() |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
39 else: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
40 p1 = Population() |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
41 p1.from_population_file(p1_input) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
42 if not p_total.is_superset(p1): |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
43 gd_util.die('There is an individual in the population that is not in the SNP table') |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
44 tags = p1.tag_list() |
| 0 | 45 |
| 46 ################################################################################ | |
| 47 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
48 gd_util.mkdir_p(extra_files_path) |
| 0 | 49 phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip') |
| 50 newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick') | |
| 51 ps_outfile = 'tree.ps' | |
| 52 pdf_outfile = os.path.join(extra_files_path, 'tree.pdf') | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
53 informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt') |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
54 mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt') |
| 0 | 55 |
| 56 ################################################################################ | |
| 57 | |
| 58 prog = 'dist_mat' | |
| 59 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
60 args = [ prog ] |
| 0 | 61 args.append(input) |
| 62 args.append(minimum_coverage) | |
| 63 args.append(minimum_quality) | |
| 64 args.append(dbkey) | |
| 65 args.append(data_source) | |
| 66 args.append(informative_snp_file) | |
| 67 args.append(mega_distance_matrix_file) | |
| 68 | |
| 69 for tag in tags: | |
|
24
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
70 if input_type == 'gd_genotype': |
|
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
71 column, name = tag.split(':') |
|
248b06e86022
Added gd_genotype datatype. Modified tools to support new datatype.
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
72 tag = '{0}:{1}'.format(int(column) - 2, name) |
| 0 | 73 args.append(tag) |
| 74 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
75 with open(phylip_outfile, 'w') as fh: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
76 gd_util.run_program(prog, args, stdout=fh) |
| 0 | 77 |
| 78 ################################################################################ | |
| 79 | |
| 80 prog = 'quicktree' | |
| 81 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
82 args = [ prog ] |
| 0 | 83 args.append('-in') |
| 84 args.append('m') | |
| 85 args.append('-out') | |
| 86 args.append('t') | |
| 87 args.append(phylip_outfile) | |
| 88 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
89 with open(newick_outfile, 'w') as fh: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
90 gd_util.run_program(prog, args, stdout=fh) |
| 0 | 91 |
| 92 ################################################################################ | |
| 93 | |
| 94 prog = 'draw_tree' | |
| 95 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
96 args = [ prog ] |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
97 |
| 0 | 98 if draw_tree_options: |
| 99 args.append(draw_tree_options) | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
100 |
| 0 | 101 args.append(newick_outfile) |
| 102 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
103 with open(ps_outfile, 'w') as fh: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
104 gd_util.run_program(prog, args, stdout=fh) |
| 0 | 105 |
| 106 ################################################################################ | |
| 107 | |
| 108 prog = 'ps2pdf' | |
| 109 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
110 args = [ prog ] |
| 0 | 111 args.append('-dPDFSETTINGS=/prepress') |
| 112 args.append(ps_outfile) | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
113 args.append(pdf_outfile) |
| 0 | 114 |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
115 gd_util.run_program(prog, args) |
| 0 | 116 |
| 117 ################################################################################ | |
| 118 | |
| 119 info_page = gd_composite.InfoPage() | |
| 120 info_page.set_title('Phylogenetic tree Galaxy Composite Dataset') | |
| 121 | |
| 122 display_file = gd_composite.DisplayFile() | |
| 123 display_value = gd_composite.DisplayValue() | |
| 124 | |
| 125 out_pdf = gd_composite.Parameter(name='tree.pdf', value='tree.pdf', display_type=display_file) | |
| 126 out_newick = gd_composite.Parameter(value='phylogenetic_tree.newick', name='phylogenetic tree (newick)', display_type=display_file) | |
| 127 out_phylip = gd_composite.Parameter(value='distance_matrix.phylip', name='Phylip distance matrix', display_type=display_file) | |
| 128 out_mega = gd_composite.Parameter(value='mega_distance_matrix.txt', name='Mega distance matrix', display_type=display_file) | |
| 129 out_snps = gd_composite.Parameter(value='informative_snps.txt', name='informative SNPs', display_type=display_file) | |
| 130 | |
| 131 info_page.add_output_parameter(out_pdf) | |
| 132 info_page.add_output_parameter(out_newick) | |
| 133 info_page.add_output_parameter(out_phylip) | |
| 134 info_page.add_output_parameter(out_mega) | |
| 135 info_page.add_output_parameter(out_snps) | |
| 136 | |
| 137 in_min_cov = gd_composite.Parameter(description='Minimum coverage', value=minimum_coverage, display_type=display_value) | |
| 138 in_min_qual = gd_composite.Parameter(description='Minimum quality', value=minimum_quality, display_type=display_value) | |
| 139 | |
| 140 include_ref_value = 'no' | |
| 141 if dbkey != 'none': | |
| 142 include_ref_value = 'yes' | |
| 143 | |
| 144 in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value) | |
| 145 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
146 if data_source == 0: |
| 0 | 147 data_source_value = 'sequence coverage' |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
24
diff
changeset
|
148 elif data_source == 1: |
| 0 | 149 data_source_value = 'estimated genotype' |
| 150 | |
| 151 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
| 152 | |
| 153 branch_type_value = 'square' | |
| 154 if 'd' in draw_tree_options: | |
| 155 branch_type_value = 'diagonal' | |
| 156 | |
| 157 in_branch_type = gd_composite.Parameter(description='Branch type', value=branch_type_value, display_type=display_value) | |
| 158 | |
| 159 branch_scale_value = 'yes' | |
| 160 if 's' in draw_tree_options: | |
| 161 branch_scale_value = 'no' | |
| 162 | |
| 163 in_branch_scale = gd_composite.Parameter(description='Draw branches to scale', value=branch_scale_value, display_type=display_value) | |
| 164 | |
| 165 branch_length_value = 'yes' | |
| 166 if 'b' in draw_tree_options: | |
| 167 branch_length_value = 'no' | |
| 168 | |
| 169 in_branch_length = gd_composite.Parameter(description='Show branch lengths', value=branch_length_value, display_type=display_value) | |
| 170 | |
| 171 tree_layout_value = 'horizontal' | |
| 172 if 'v' in draw_tree_options: | |
| 173 tree_layout_value = 'vertical' | |
| 174 | |
| 175 in_tree_layout = gd_composite.Parameter(description='Tree layout', value=tree_layout_value, display_type=display_value) | |
| 176 | |
| 177 info_page.add_input_parameter(in_min_cov) | |
| 178 info_page.add_input_parameter(in_min_qual) | |
| 179 info_page.add_input_parameter(in_include_ref) | |
| 180 info_page.add_input_parameter(in_data_source) | |
| 181 info_page.add_input_parameter(in_branch_type) | |
| 182 info_page.add_input_parameter(in_branch_scale) | |
| 183 info_page.add_input_parameter(in_branch_length) | |
| 184 info_page.add_input_parameter(in_tree_layout) | |
| 185 | |
| 186 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) | |
| 187 | |
| 188 info_page.add_misc(misc_individuals) | |
| 189 | |
| 190 | |
| 191 with open(output, 'w') as ofh: | |
| 192 print >> ofh, info_page.render() | |
| 193 | |
| 194 ################################################################################ | |
| 195 | |
| 196 sys.exit(0) | |
| 197 |
