Mercurial > repos > miller-lab > genome_diversity
diff phylogenetic_tree.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 248b06e86022 |
children |
line wrap: on
line diff
--- a/phylogenetic_tree.py Mon Jun 03 12:29:29 2013 -0400 +++ b/phylogenetic_tree.py Mon Jul 15 10:47:35 2013 -0400 @@ -1,154 +1,63 @@ #!/usr/bin/env python +import gd_util import os -import errno import sys -import subprocess -import shutil from Population import Population import gd_composite ################################################################################ -def mkdir_p(path): - try: - os.makedirs(path) - except OSError, e: - if e.errno <> errno.EEXIST: - raise - -################################################################################ +if len(sys.argv) != 12: + gd_util.die('Usage') -# <command interpreter="python"> -# phylogenetic_tree.py "$input" "$output" "$output.files_path" -# -# #if $input_type.choice == '0' -# "gd_snp" -# #if $input_type.data_source.choice == '0' -# "sequence_coverage" -# "$input_type.data_source.minimum_coverage" -# "$input_type.data_source.minimum_quality" -# #else if $input_type.data_source.choice == '1' -# "estimated_genotype" -# #else if $input_type.choice == '1' -# "gd_genotype" -# #end if -# -# #if $individuals.choice == '0' -# "all_individuals" -# #else if $individuals.choice == '1' -# "$individuals.p1_input" -# #end if -# -# #if ((str($input.metadata.scaffold) == str($input.metadata.ref)) and (str($input.metadata.pos) == str($input.metadata.rPos))) or (str($include_reference) == '0') -# "none" -# #else -# "$input.metadata.dbkey" -# #end if -# -# #set $draw_tree_options = ''.join(str(x) for x in [$branch_style, $scale_style, $length_style, $layout_style]) -# #if $draw_tree_options == '' -# "" -# #else -# "-$draw_tree_options" -# #end if -# -# #for $individual_name, $individual_col in zip($input.dataset.metadata.individual_names, $input.dataset.metadata.individual_columns) -# #set $arg = '%s:%s' % ($individual_col, $individual_name) -# "$arg" -# #end for -# </command> - -################################################################################ - -# if len(sys.argv) < 11: -# print >> sys.stderr, "Usage" -# sys.exit(1) -# -# input, p1_input, output, extra_files_path, minimum_coverage, minimum_quality, dbkey, data_source, draw_tree_options = sys.argv[1:10] -# -# individual_metadata = sys.argv[10:] -# -# # note: TEST THIS -# if dbkey in ['', '?', 'None']: -# dbkey = 'none' -# -# p_total = Population() -# p_total.from_tag_list(individual_metadata) - -if len(sys.argv) < 5: - print >> sys.stderr, 'Usage' - sys.exit(1) - -input, output, extra_files_path, input_type = sys.argv[1:5] -args = sys.argv[5:] - -data_source = '1' -minimum_coverage = '0' -minimum_quality = '0' +input, output, extra_files_path, input_type, data_source_arg, minimum_coverage, minimum_quality, p1_input, dbkey, draw_tree_options, ind_arg = sys.argv[1:] if input_type == 'gd_snp': - data_source_arg = args.pop(0) if data_source_arg == 'sequence_coverage': - data_source = '0' - minimum_coverage = args.pop(0) - minimum_quality = args.pop(0) + data_source = 0 elif data_source_arg == 'estimated_genotype': - pass + data_source = 1 else: - print >> sys.stderr, 'Unsupported data_source:', data_source_arg - sys.exit(1) + gd_util.die('Unsupported data_source: {0}'.format(data_source_arg)) elif input_type == 'gd_genotype': - pass + data_source = 1 + minimum_coverage = 0 + minimum_quality = 0 else: - print >> sys.stderr, 'Unsupported input_type:', input_type - sys.exit(1) - -p1_input, dbkey, draw_tree_options = args[:3] + gd_util.die('Unsupported input_type:: {0}'.format(input_type)) # note: TEST THIS if dbkey in ['', '?', 'None']: dbkey = 'none' -individual_metadata = args[3:] +p_total = Population() +p_total.from_wrapped_dict(ind_arg) -p_total = Population() -p_total.from_tag_list(individual_metadata) - -################################################################################ - -mkdir_p(extra_files_path) +if p1_input == "all_individuals": + tags = p_total.tag_list() +else: + p1 = Population() + p1.from_population_file(p1_input) + if not p_total.is_superset(p1): + gd_util.die('There is an individual in the population that is not in the SNP table') + tags = p1.tag_list() ################################################################################ -def run_program(prog, args, ofh): - #print "args: ", ' '.join(args) - p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=ofh, stderr=subprocess.PIPE) - (stdoutdata, stderrdata) = p.communicate() - rc = p.returncode - ofh.close() - - if rc != 0: - #print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args)) - print >> sys.stderr, stderrdata - sys.exit(1) - -################################################################################ - +gd_util.mkdir_p(extra_files_path) phylip_outfile = os.path.join(extra_files_path, 'distance_matrix.phylip') newick_outfile = os.path.join(extra_files_path, 'phylogenetic_tree.newick') ps_outfile = 'tree.ps' pdf_outfile = os.path.join(extra_files_path, 'tree.pdf') +informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt') +mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt') ################################################################################ -informative_snp_file = os.path.join(extra_files_path, 'informative_snps.txt') -mega_distance_matrix_file = os.path.join(extra_files_path, 'mega_distance_matrix.txt') - prog = 'dist_mat' -args = [] -args.append(prog) +args = [ prog ] args.append(input) args.append(minimum_coverage) args.append(minimum_quality) @@ -157,67 +66,53 @@ args.append(informative_snp_file) args.append(mega_distance_matrix_file) -if p1_input == "all_individuals": - tags = p_total.tag_list() -else: - p1 = Population() - p1.from_population_file(p1_input) - if not p_total.is_superset(p1): - print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' - sys.exit(1) - tags = p1.tag_list() - for tag in tags: if input_type == 'gd_genotype': column, name = tag.split(':') tag = '{0}:{1}'.format(int(column) - 2, name) args.append(tag) -fh = open(phylip_outfile, 'w') -run_program(None, args, fh) +with open(phylip_outfile, 'w') as fh: + gd_util.run_program(prog, args, stdout=fh) ################################################################################ prog = 'quicktree' -args = [] -args.append(prog) +args = [ prog ] args.append('-in') args.append('m') args.append('-out') args.append('t') args.append(phylip_outfile) -fh = open(newick_outfile, 'w') -run_program(None, args, fh) +with open(newick_outfile, 'w') as fh: + gd_util.run_program(prog, args, stdout=fh) ################################################################################ prog = 'draw_tree' -args = [] -args.append(prog) +args = [ prog ] + if draw_tree_options: args.append(draw_tree_options) + args.append(newick_outfile) -fh = open(ps_outfile, 'w') -run_program(None, args, fh) +with open(ps_outfile, 'w') as fh: + gd_util.run_program(prog, args, stdout=fh) ################################################################################ prog = 'ps2pdf' -args = [] -args.append(prog) +args = [ prog ] args.append('-dPDFSETTINGS=/prepress') args.append(ps_outfile) -args.append('-') +args.append(pdf_outfile) -fh = open(pdf_outfile, 'w') -run_program(None, args, fh) - -shutil.copyfile(pdf_outfile, output) +gd_util.run_program(prog, args) ################################################################################ @@ -248,9 +143,9 @@ in_include_ref = gd_composite.Parameter(description='Include reference sequence', value=include_ref_value, display_type=display_value) -if data_source == '0': +if data_source == 0: data_source_value = 'sequence coverage' -elif data_source == '1': +elif data_source == 1: data_source_value = 'estimated genotype' in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)