diff dpmix.py @ 17:a3af29edcce2

Uploaded Miller Lab Devshed version a51c894f5bed
author miller-lab
date Fri, 28 Sep 2012 11:57:18 -0400
parents 4b6590dd7250
children 248b06e86022
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dpmix.py	Fri Sep 28 11:57:18 2012 -0400
@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+
+import errno
+import sys
+import os
+import subprocess
+from Population import Population
+import gd_composite
+from dpmix_plot import make_dpmix_plot
+from LocationFile import LocationFile
+
+################################################################################
+
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError, e:
+        if e.errno <> errno.EEXIST:
+            raise
+
+def run_program(prog, args, stdout_file=None, space_to_tab=False):
+    #print "args: ", ' '.join(args)
+    p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (stdoutdata, stderrdata) = p.communicate()
+    rc = p.returncode
+
+    if stdout_file is not None:
+        with open(stdout_file, 'w') as ofh:
+            lines = stdoutdata.split('\n')
+            for line in lines:
+                line = line.strip()
+                if line:
+                    if space_to_tab:
+                        line = line.replace(' ', '\t')
+                    print >> ofh, line
+
+    if rc != 0:
+        print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
+        print >> sys.stderr, stderrdata
+        sys.exit(1)
+
+################################################################################
+
+if len(sys.argv) < 15:
+    print "usage"
+    sys.exit(1)
+
+input, data_source, switch_penalty, ap1_input, ap2_input, p_input, output, output2, output2_dir, dbkey, ref_column, galaxy_data_index_dir, heterochromatin_loc_file = sys.argv[1:14]
+individual_metadata = sys.argv[14:]
+
+chrom = 'all'
+add_logs = '0'
+
+loc_path = os.path.join(galaxy_data_index_dir, heterochromatin_loc_file)
+location_file = LocationFile(loc_path)
+heterochrom_path = location_file.get_values_if_exists(dbkey)
+if heterochrom_path is None:
+    heterochrom_path = '/dev/null'
+
+population_list = []
+
+p_total = Population()
+p_total.from_tag_list(individual_metadata)
+
+ap1 = Population(name='Ancestral population 1')
+ap1.from_population_file(ap1_input)
+population_list.append(ap1)
+if not p_total.is_superset(ap1):
+    print >> sys.stderr, 'There is an individual in ancestral population 1 that is not in the SNP table'
+    sys.exit(1)
+
+ap2 = Population(name='Ancestral population 2')
+ap2.from_population_file(ap2_input)
+population_list.append(ap2)
+if not p_total.is_superset(ap2):
+    print >> sys.stderr, 'There is an individual in ancestral population 2 that is not in the SNP table'
+    sys.exit(1)
+
+p = Population(name='Potentially admixed')
+p.from_population_file(p_input)
+population_list.append(p)
+if not p_total.is_superset(p):
+    print >> sys.stderr, 'There is an individual in the population that is not in the SNP table'
+    sys.exit(1)
+
+mkdir_p(output2_dir)
+
+################################################################################
+# Create tabular file
+################################################################################
+
+misc_file = os.path.join(output2_dir, 'misc.txt')
+
+prog = 'dpmix'
+args = [ prog ]
+args.append(input)
+args.append(ref_column)
+args.append(chrom)
+args.append(data_source)
+args.append(add_logs)
+args.append(switch_penalty)
+args.append(heterochrom_path)
+args.append(misc_file)
+
+columns = ap1.column_list()
+for column in columns:
+    args.append('{0}:1:{1}'.format(column, ap1.individual_with_column(column).name))
+
+columns = ap2.column_list()
+for column in columns:
+    args.append('{0}:2:{1}'.format(column, ap2.individual_with_column(column).name))
+
+columns = p.column_list()
+for column in columns:
+    args.append('{0}:0:{1}'.format(column, p.individual_with_column(column).name))
+
+run_program(None, args, stdout_file=output, space_to_tab=True)
+
+################################################################################
+# Create pdf file
+################################################################################
+
+pdf_file = os.path.join(output2_dir, 'dpmix.pdf')
+make_dpmix_plot(dbkey, output, pdf_file, galaxy_data_index_dir)
+
+################################################################################
+# Create html
+################################################################################
+
+info_page = gd_composite.InfoPage()
+info_page.set_title('dpmix Galaxy Composite Dataset')
+
+display_file = gd_composite.DisplayFile()
+display_value = gd_composite.DisplayValue()
+
+out_pdf = gd_composite.Parameter(name='dpmix.pdf', value='dpmix.pdf', display_type=display_file)
+out_misc = gd_composite.Parameter(name='misc.txt', value='misc.txt', display_type=display_file)
+
+info_page.add_output_parameter(out_pdf)
+info_page.add_output_parameter(out_misc)
+
+if data_source == '0':
+    data_source_value = 'sequence coverage'
+elif data_source == '1':
+    data_source_value = 'estimated genotype'
+
+in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value)
+in_switch_penalty = gd_composite.Parameter(description='Switch penalty', value=switch_penalty, display_type=display_value)
+
+info_page.add_input_parameter(in_data_source)
+info_page.add_input_parameter(in_switch_penalty)
+
+misc_populations =  gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList())
+
+info_page.add_misc(misc_populations)
+
+with open(output2, 'w') as ofh:
+    print >> ofh, info_page.render()
+
+sys.exit(0)
+
+