Mercurial > repos > miller-lab > genome_diversity
diff phylogenetic_tree.py @ 0:2c498d40ecde
Uploaded
author | miller-lab |
---|---|
date | Mon, 09 Apr 2012 12:03:06 -0400 |
parents | |
children | 248b06e86022 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/phylogenetic_tree.py Mon Apr 09 12:03:06 2012 -0400 @@ -0,0 +1,219 @@ +#!/usr/bin/env python + +import os +import errno +import sys +import subprocess +import shutil +from Population import Population +import gd_composite + +################################################################################ + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError, e: + if e.errno <> errno.EEXIST: + raise + +################################################################################ + +if len(sys.argv) < 11: + print >> sys.stderr, "Usage" + sys.exit(1) + +input, p1_input, output, extra_files_path, minimum_coverage, minimum_quality, dbkey, data_source, draw_tree_options = sys.argv[1:10] + +individual_metadata = sys.argv[10:] + +# note: TEST THIS +if dbkey in ['', '?', 'None']: + dbkey = 'none' + +p_total = Population() +p_total.from_tag_list(individual_metadata) + + +################################################################################ + +mkdir_p(extra_files_path) + +################################################################################ + +def run_program(prog, args, ofh): + #print "args: ", ' '.join(args) + p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=ofh, stderr=subprocess.PIPE) + (stdoutdata, stderrdata) = p.communicate() + rc = p.returncode + ofh.close() + + if rc != 0: + #print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) + print >> sys.stderr, stderrdata + sys.exit(1) + +################################################################################ + +phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip') +newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick') +ps_outfile = 'tree.ps' +pdf_outfile = os.path.join(extra_files_path, 'tree.pdf') + +################################################################################ + +informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt') +mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt') + +prog = 'dist_mat' + +args = [] +args.append(prog) +args.append(input) +args.append(minimum_coverage) +args.append(minimum_quality) +args.append(dbkey) +args.append(data_source) +args.append(informative_snp_file) +args.append(mega_distance_matrix_file) + +if p1_input == "all_individuals": + tags = p_total.tag_list() +else: + p1 = Population() + p1.from_population_file(p1_input) + if not p_total.is_superset(p1): + print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' + sys.exit(1) + tags = p1.tag_list() + +for tag in tags: + args.append(tag) + +fh = open(phylip_outfile, 'w') +run_program(None, args, fh) + +################################################################################ + +prog = 'quicktree' + +args = [] +args.append(prog) +args.append('-in') +args.append('m') +args.append('-out') +args.append('t') +args.append(phylip_outfile) + +fh = open(newick_outfile, 'w') +run_program(None, args, fh) + +################################################################################ + +prog = 'draw_tree' + +args = [] +args.append(prog) +if draw_tree_options: + args.append(draw_tree_options) +args.append(newick_outfile) + +fh = open(ps_outfile, 'w') +run_program(None, args, fh) + +################################################################################ + +prog = 'ps2pdf' + +args = [] +args.append(prog) +args.append('-dPDFSETTINGS=/prepress') +args.append(ps_outfile) +args.append('-') + +fh = open(pdf_outfile, 'w') +run_program(None, args, fh) + +shutil.copyfile(pdf_outfile, output) + +################################################################################ + +info_page = gd_composite.InfoPage() +info_page.set_title('Phylogenetic tree Galaxy Composite Dataset') + +display_file = gd_composite.DisplayFile() +display_value = gd_composite.DisplayValue() + +out_pdf = gd_composite.Parameter(name='tree.pdf', value='tree.pdf', display_type=display_file) +out_newick = gd_composite.Parameter(value='phylogenetic_tree.newick', name='phylogenetic tree (newick)', display_type=display_file) +out_phylip = gd_composite.Parameter(value='distance_matrix.phylip', name='Phylip distance matrix', display_type=display_file) +out_mega = gd_composite.Parameter(value='mega_distance_matrix.txt', name='Mega distance matrix', display_type=display_file) +out_snps = gd_composite.Parameter(value='informative_snps.txt', name='informative SNPs', display_type=display_file) + +info_page.add_output_parameter(out_pdf) +info_page.add_output_parameter(out_newick) +info_page.add_output_parameter(out_phylip) +info_page.add_output_parameter(out_mega) +info_page.add_output_parameter(out_snps) + +in_min_cov = gd_composite.Parameter(description='Minimum coverage', value=minimum_coverage, display_type=display_value) +in_min_qual = gd_composite.Parameter(description='Minimum quality', value=minimum_quality, display_type=display_value) + +include_ref_value = 'no' +if dbkey != 'none': + include_ref_value = 'yes' + +in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value) + +if data_source == '0': + data_source_value = 'sequence coverage' +elif data_source == '1': + data_source_value = 'estimated genotype' + +in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) + +branch_type_value = 'square' +if 'd' in draw_tree_options: + branch_type_value = 'diagonal' + +in_branch_type = gd_composite.Parameter(description='Branch type', value=branch_type_value, display_type=display_value) + +branch_scale_value = 'yes' +if 's' in draw_tree_options: + branch_scale_value = 'no' + +in_branch_scale = gd_composite.Parameter(description='Draw branches to scale', value=branch_scale_value, display_type=display_value) + +branch_length_value = 'yes' +if 'b' in draw_tree_options: + branch_length_value = 'no' + +in_branch_length = gd_composite.Parameter(description='Show branch lengths', value=branch_length_value, display_type=display_value) + +tree_layout_value = 'horizontal' +if 'v' in draw_tree_options: + tree_layout_value = 'vertical' + +in_tree_layout = gd_composite.Parameter(description='Tree layout', value=tree_layout_value, display_type=display_value) + +info_page.add_input_parameter(in_min_cov) +info_page.add_input_parameter(in_min_qual) +info_page.add_input_parameter(in_include_ref) +info_page.add_input_parameter(in_data_source) +info_page.add_input_parameter(in_branch_type) +info_page.add_input_parameter(in_branch_scale) +info_page.add_input_parameter(in_branch_length) +info_page.add_input_parameter(in_tree_layout) + +misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) + +info_page.add_misc(misc_individuals) + + +with open(output, 'w') as ofh: + print >> ofh, info_page.render() + +################################################################################ + +sys.exit(0) +