Mercurial > repos > miller-lab > genome_diversity
view coverage_distributions.py @ 18:f04f40a36cc8
Latest changes from Belinda and Cathy. Webb's updates to the Fst tools.
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Tue, 23 Oct 2012 12:41:52 -0400 |
parents | 2c498d40ecde |
children | 8997f2ca8c7a |
line wrap: on
line source
#!/usr/bin/env python import os import errno import sys import shutil import subprocess from Population import Population import gd_composite ################################################################################ def mkdir_p(path): try: os.makedirs(path) except OSError, e: if e.errno <> errno.EEXIST: raise ################################################################################ if len(sys.argv) < 7: print >> sys.stderr, "Usage" sys.exit(1) input, data_source, output, extra_files_path = sys.argv[1:5] individual_metadata = [] population_info = [] p1_input = None all_individuals = False for arg in sys.argv[5:]: if arg == 'all_individuals': all_individuals = True elif len(arg) > 12 and arg[:12] == 'individuals:': p1_input = arg[12:] elif len(arg) > 11: if arg[:11] == 'population:': file, name = arg[11:].split(':', 1) population_info.append((file, name)) elif arg[:11] == 'individual:': individual_metadata.append(arg[11:]) p_total = Population() p_total.from_tag_list(individual_metadata) ################################################################################ mkdir_p(extra_files_path) ################################################################################ prog = 'coverage' args = [] args.append(prog) args.append(input) args.append(data_source) user_coverage_file = os.path.join(extra_files_path, 'coverage.txt') args.append(user_coverage_file) population_list = [] if all_individuals: tags = p_total.tag_list() elif p1_input is not None: p1 = Population() this_pop = Population() this_pop.from_population_file(p1_input) population_list.append(this_pop) p1.from_population_file(p1_input) if not p_total.is_superset(p1): print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' sys.exit(1) tags = p1.tag_list() else: tags = [] for population_file, population_name in population_info: population = Population() this_pop = Population() this_pop.from_population_file(population_file) population_list.append(this_pop) population.from_population_file(population_file) if not p_total.is_superset(population): print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name) sys.exit(1) columns = population.column_list() for column in columns: tags.append('{0}:{1}'.format(column, population_name)) for tag in tags: args.append(tag) ## text output coverage_file = 'coverage.txt' fh = open(coverage_file, 'w') #print "args:", ' '.join(args) p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) rc = p.wait() fh.close() ## graphical output fh = open(coverage_file) coverage2_file = 'coverage2.txt' ofh = open(coverage2_file, 'w') for line in fh: line = line.rstrip('\r\n') elems = line.split('\t') name = elems.pop(0) values = [ elems[0] ] for idx in range(1, len(elems)): val = str(float(elems[idx]) - float(elems[idx-1])) values.append(val) print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values)) fh.close() ofh.close() ################################################################################ prog = 'R' args = [] args.append(prog) args.append('--vanilla') args.append('--quiet') _realpath = os.path.realpath(__file__) _script_dir = os.path.dirname(_realpath) r_script_file = os.path.join(_script_dir, 'coverage_plot.r') ifh = open(r_script_file) ofh = open('/dev/null', 'w') #print "args:", ' '.join(args) p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None) rc = p.wait() ifh.close() ofh.close() pdf_file = os.path.join(extra_files_path, 'coverage.pdf') shutil.copy2('coverage.pdf', pdf_file) os.remove('coverage.pdf') os.remove(coverage2_file) ################################################################################ info_page = gd_composite.InfoPage() info_page.set_title('Coverage distributions Galaxy Composite Dataset') display_file = gd_composite.DisplayFile() display_value = gd_composite.DisplayValue() out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file) out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file) info_page.add_output_parameter(out_pdf) info_page.add_output_parameter(out_txt) if data_source == '0': data_source_value = 'sequence coverage' elif data_source == '1': data_source_value = 'estimated genotype' in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) info_page.add_input_parameter(in_data_source) if population_list: misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) info_page.add_misc(misc_populations) else: misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) info_page.add_misc(misc_individuals) with open (output, 'w') as ofh: print >> ofh, info_page.render() sys.exit(0)