Mercurial > repos > miller-lab > genome_diversity
diff coverage_distributions.py @ 27:8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
author | Richard Burhans <burhans@bx.psu.edu> |
---|---|
date | Mon, 15 Jul 2013 10:47:35 -0400 |
parents | 2c498d40ecde |
children |
line wrap: on
line diff
--- a/coverage_distributions.py Mon Jun 03 12:29:29 2013 -0400 +++ b/coverage_distributions.py Mon Jul 15 10:47:35 2013 -0400 @@ -1,60 +1,43 @@ #!/usr/bin/env python +import gd_util import os -import errno import sys -import shutil -import subprocess from Population import Population import gd_composite ################################################################################ -def mkdir_p(path): - try: - os.makedirs(path) - except OSError, e: - if e.errno <> errno.EEXIST: - raise +if len(sys.argv) < 7: + gd_util.die('Usage') -################################################################################ +input, data_source, output, extra_files_path, ind_arg = sys.argv[1:6] -if len(sys.argv) < 7: - print >> sys.stderr, "Usage" - sys.exit(1) - -input, data_source, output, extra_files_path = sys.argv[1:5] - -individual_metadata = [] population_info = [] p1_input = None all_individuals = False -for arg in sys.argv[5:]: +for arg in sys.argv[6:]: if arg == 'all_individuals': all_individuals = True elif len(arg) > 12 and arg[:12] == 'individuals:': p1_input = arg[12:] - elif len(arg) > 11: - if arg[:11] == 'population:': - file, name = arg[11:].split(':', 1) - population_info.append((file, name)) - elif arg[:11] == 'individual:': - individual_metadata.append(arg[11:]) + elif len(arg) > 11 and arg[:11] == 'population:': + file, name = arg[11:].split(':', 1) + population_info.append((file, name)) p_total = Population() -p_total.from_tag_list(individual_metadata) +p_total.from_wrapped_dict(ind_arg) ################################################################################ -mkdir_p(extra_files_path) +gd_util.mkdir_p(extra_files_path) ################################################################################ prog = 'coverage' -args = [] -args.append(prog) +args = [ prog ] args.append(input) args.append(data_source) @@ -72,8 +55,7 @@ population_list.append(this_pop) p1.from_population_file(p1_input) if not p_total.is_superset(p1): - print >> sys.stderr, 'There is an individual in the population that is not in the SNP table' - sys.exit(1) + gd_util.die('There is an individual in the population that is not in the SNP table') tags = p1.tag_list() else: tags = [] @@ -84,8 +66,7 @@ population_list.append(this_pop) population.from_population_file(population_file) if not p_total.is_superset(population): - print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name) - sys.exit(1) + gd_util.die('There is an individual in the {} population that is not in the SNP table'.format(population_name)) columns = population.column_list() for column in columns: tags.append('{0}:{1}'.format(column, population_name)) @@ -95,55 +76,37 @@ ## text output coverage_file = 'coverage.txt' -fh = open(coverage_file, 'w') -#print "args:", ' '.join(args) -p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr) -rc = p.wait() -fh.close() +with open(coverage_file, 'w') as fh: + gd_util.run_program(prog, args, stdout=fh) ## graphical output -fh = open(coverage_file) coverage2_file = 'coverage2.txt' -ofh = open(coverage2_file, 'w') - -for line in fh: - line = line.rstrip('\r\n') - elems = line.split('\t') - name = elems.pop(0) - values = [ elems[0] ] - for idx in range(1, len(elems)): - val = str(float(elems[idx]) - float(elems[idx-1])) - values.append(val) - print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values)) - -fh.close() -ofh.close() +with open(coverage_file) as fh, open(coverage2_file, 'w') as ofh: + for line in fh: + line = line.rstrip('\r\n') + elems = line.split('\t') + name = elems.pop(0) + values = [ elems[0] ] + for idx in range(1, len(elems)): + val = str(float(elems[idx]) - float(elems[idx-1])) + values.append(val) + print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values)) ################################################################################ -prog = 'R' +prog = 'Rscript' -args = [] -args.append(prog) -args.append('--vanilla') -args.append('--quiet') +args = [ prog ] _realpath = os.path.realpath(__file__) _script_dir = os.path.dirname(_realpath) r_script_file = os.path.join(_script_dir, 'coverage_plot.r') - -ifh = open(r_script_file) -ofh = open('/dev/null', 'w') -#print "args:", ' '.join(args) -p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None) -rc = p.wait() -ifh.close() -ofh.close() +args.append(r_script_file) pdf_file = os.path.join(extra_files_path, 'coverage.pdf') -shutil.copy2('coverage.pdf', pdf_file) -os.remove('coverage.pdf') -os.remove(coverage2_file) +args.append(pdf_file) + +gd_util.run_program(prog, args) ################################################################################ @@ -159,7 +122,6 @@ info_page.add_output_parameter(out_pdf) info_page.add_output_parameter(out_txt) - if data_source == '0': data_source_value = 'sequence coverage' elif data_source == '1': @@ -176,12 +138,8 @@ misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) info_page.add_misc(misc_individuals) - - - with open (output, 'w') as ofh: print >> ofh, info_page.render() - sys.exit(0)