Mercurial > repos > miller-lab > genome_diversity
diff prepare_population_structure.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 248b06e86022 |
children |
line wrap: on
line diff
--- a/prepare_population_structure.py Mon Jun 03 12:29:29 2013 -0400 +++ b/prepare_population_structure.py Mon Jul 15 10:47:35 2013 -0400 @@ -1,16 +1,15 @@ #!/usr/bin/env python -import errno +import gd_util import os import shutil -import subprocess import sys from Population import Population import gd_composite ################################################################################ -def do_import(filename, files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list): +def do_import(filename, files_path, min_reads, min_qual, min_spacing, using_info, population_list): info_page = gd_composite.InfoPage() info_page.set_title('Prepare to look for population structure Galaxy Composite Dataset') @@ -39,50 +38,29 @@ with open(filename, 'w') as ofh: print >> ofh, info_page.render() -def mkdir_p(path): - try: - os.makedirs(path) - except OSError, e: - if e.errno <> errno.EEXIST: - raise - -def die(message, exit=True): - print >> sys.stderr, message - if exit: - sys.exit(1) - ################################################################################ if len(sys.argv) < 10: - die("Usage") + gd_util.die('Usage') # parse command line -input_snp_filename, input_type, min_reads, min_qual, min_spacing, output_filename, output_files_path = sys.argv[1:8] -args = sys.argv[8:] +input_snp_filename, input_type, min_reads, min_qual, min_spacing, output_filename, output_files_path, ind_arg = sys.argv[1:9] +args = sys.argv[9:] -individual_metadata = [] population_files = [] -population_names = [] all_individuals = False for arg in args: if arg == 'all_individuals': all_individuals = True - elif len(arg) > 11: - tag = arg[:11] - value = arg[11:] - if tag == 'individual:': - individual_metadata.append(value) - elif tag == 'population:': - filename, name = value.split(':', 1) - population_files.append(filename) - population_names.append(name) + elif len(arg) > 11 and arg[:11] == 'population:': + file, name = arg[11:].split(':', 1) + population_files.append((file, name)) p_total = Population() -p_total.from_tag_list(individual_metadata) +p_total.from_wrapped_dict(ind_arg) individual_population = {} - population_list = [] if all_individuals: @@ -91,57 +69,50 @@ population_list.append(p1) else: p1 = Population() - for idx in range(len(population_files)): - population_file = population_files[idx] - population_name = population_names[idx] - this_pop = Population(population_name) - this_pop.from_population_file(population_file) + for file, name in population_files: + this_pop = Population(name) + this_pop.from_population_file(file) population_list.append(this_pop) - p1.from_population_file(population_file) - tags = p1.tag_list() - for tag in tags: + + for tag in this_pop.tag_list(): if tag not in individual_population: - individual_population[tag] = population_name + individual_population[tag] = name + + # add individuals from this file to p1 + p1.from_population_file(file) + if not p_total.is_superset(p1): - print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' - sys.exit(1) + gd_util.die('There is an individual in the population that is not in the SNP table') -# run tool +################################################################################ + prog = 'admix_prep' -args = [] -args.append(prog) +args = [ prog ] args.append(input_snp_filename) args.append(min_reads) args.append(min_qual) args.append(min_spacing) -tags = p1.tag_list() -for tag in tags: +for tag in p1.tag_list(): if input_type == 'gd_genotype': - column, name = tag.split(':') + column, name = tag.split(':', 1) tag = '{0}:{1}'.format(int(column) - 2, name) args.append(tag) -#print "args:", ' '.join(args) -p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=subprocess.PIPE, stderr=sys.stderr) -(stdoutdata, stderrdata) = p.communicate() -rc = p.returncode +stdoutdata, stderrdata = gd_util.run_program(prog, args) +using_info = stdoutdata.rstrip('\r\n') -if rc != 0: - die('admix_prep failed: rc={0}'.format(rc)) +################################################################################ -using_info = stdoutdata.rstrip('\r\n') -mkdir_p(output_files_path) +gd_util.mkdir_p(output_files_path) + output_ped_filename = os.path.join(output_files_path, 'admix.ped') output_map_filename = os.path.join(output_files_path, 'admix.map') shutil.copy2('admix.ped', output_ped_filename) shutil.copy2('admix.map', output_map_filename) -do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, tags, using_info, population_list) -os.unlink('admix.ped') -os.unlink('admix.map') - +do_import(output_filename, output_files_path, min_reads, min_qual, min_spacing, using_info, population_list) sys.exit(0)