diff coverage_distributions.py @ 12:4b6590dd7250

Uploaded
author miller-lab
date Wed, 12 Sep 2012 17:10:26 -0400
parents 2c498d40ecde
children 8997f2ca8c7a
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/coverage_distributions.py	Wed Sep 12 17:10:26 2012 -0400
@@ -0,0 +1,187 @@
+#!/usr/bin/env python
+
+import os
+import errno
+import sys
+import shutil
+import subprocess
+from Population import Population
+import gd_composite
+
+################################################################################
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+################################################################################
+
+if len(sys.argv) < 7:
+    print >> sys.stderr, "Usage"
+    sys.exit(1)
+
+input, data_source, output, extra_files_path = sys.argv[1:5]
+
+individual_metadata = []
+population_info = []
+p1_input = None
+all_individuals = False
+
+for arg in sys.argv[5:]:
+    if arg == 'all_individuals':
+        all_individuals = True
+    elif len(arg) > 12 and arg[:12] == 'individuals:':
+        p1_input = arg[12:]
+    elif len(arg) > 11:
+        if arg[:11] == 'population:':
+            file, name = arg[11:].split(':', 1)
+            population_info.append((file, name))
+        elif arg[:11] == 'individual:':
+            individual_metadata.append(arg[11:])
+
+p_total = Population()
+p_total.from_tag_list(individual_metadata)
+
+################################################################################
+
+mkdir_p(extra_files_path)
+
+################################################################################
+
+prog = 'coverage'
+
+args = []
+args.append(prog)
+args.append(input)
+args.append(data_source)
+
+user_coverage_file = os.path.join(extra_files_path, 'coverage.txt')
+args.append(user_coverage_file)
+
+population_list = []
+
+if all_individuals:
+    tags = p_total.tag_list()
+elif p1_input is not None:
+    p1 = Population()
+    this_pop = Population()
+    this_pop.from_population_file(p1_input)
+    population_list.append(this_pop)
+    p1.from_population_file(p1_input)
+    if not p_total.is_superset(p1):
+        print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
+        sys.exit(1)
+    tags = p1.tag_list()
+else:
+    tags = []
+    for population_file, population_name in population_info:
+        population = Population()
+        this_pop = Population()
+        this_pop.from_population_file(population_file)
+        population_list.append(this_pop)
+        population.from_population_file(population_file)
+        if not p_total.is_superset(population):
+            print >> sys.stderr, 'There is an individual in the {} population that is not in the SNP table'.format(population_name)
+            sys.exit(1)
+        columns = population.column_list()
+        for column in columns:
+            tags.append('{0}:{1}'.format(column, population_name))
+
+for tag in tags:
+    args.append(tag)
+
+## text output
+coverage_file = 'coverage.txt'
+fh = open(coverage_file, 'w')
+#print "args:", ' '.join(args)
+p = subprocess.Popen(args, bufsize=-1, stdin=None, stdout=fh, stderr=sys.stderr)
+rc = p.wait()
+fh.close()
+
+## graphical output
+fh = open(coverage_file)
+coverage2_file = 'coverage2.txt'
+ofh = open(coverage2_file, 'w')
+
+for line in fh:
+    line = line.rstrip('\r\n')
+    elems = line.split('\t')
+    name = elems.pop(0)
+    values = [ elems[0] ]
+    for idx in range(1, len(elems)):
+        val = str(float(elems[idx]) - float(elems[idx-1]))
+        values.append(val)
+    print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values))
+
+fh.close()
+ofh.close()
+
+################################################################################
+
+prog = 'R'
+
+args = []
+args.append(prog)
+args.append('--vanilla')
+args.append('--quiet')
+
+_realpath = os.path.realpath(__file__)
+_script_dir = os.path.dirname(_realpath)
+r_script_file = os.path.join(_script_dir, 'coverage_plot.r')
+
+ifh = open(r_script_file)
+ofh = open('/dev/null', 'w')
+#print "args:", ' '.join(args)
+p = subprocess.Popen(args, bufsize=-1, stdin=ifh, stdout=ofh, stderr=None)
+rc = p.wait()
+ifh.close()
+ofh.close()
+
+pdf_file = os.path.join(extra_files_path, 'coverage.pdf')
+shutil.copy2('coverage.pdf', pdf_file)
+os.remove('coverage.pdf')
+os.remove(coverage2_file)
+
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('Coverage distributions Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file)
+out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_txt)
+
+
+if data_source == '0':
+    data_source_value = 'sequence coverage'
+elif data_source == '1':
+    data_source_value = 'estimated genotype'
+
+in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
+
+info_page.add_input_parameter(in_data_source)
+
+if population_list:
+    misc_populations =  gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
+    info_page.add_misc(misc_populations)
+else:
+    misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList())
+    info_page.add_misc(misc_individuals)
+
+
+
+
+with open (output, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+
+sys.exit(0)
+