Mercurial > repos > miller-lab > genome_diversity
annotate coverage_distributions.py @ 36:51cd0307fb70
Phylip's extra ouputs are now stored in the job working directory
| author | Richard Burhans <burhans@bx.psu.edu> |
|---|---|
| date | Wed, 20 Nov 2013 16:32:01 -0500 |
| parents | 8997f2ca8c7a |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
3 import gd_util |
| 0 | 4 import os |
| 5 import sys | |
| 6 from Population import Population | |
| 7 import gd_composite | |
| 8 | |
| 9 ################################################################################ | |
| 10 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
11 if len(sys.argv) < 7: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
12 gd_util.die('Usage') |
| 0 | 13 |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
14 input, data_source, output, extra_files_path, ind_arg = sys.argv[1:6] |
| 0 | 15 |
| 16 population_info = [] | |
| 17 p1_input = None | |
| 18 all_individuals = False | |
| 19 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
20 for arg in sys.argv[6:]: |
| 0 | 21 if arg == 'all_individuals': |
| 22 all_individuals = True | |
| 23 elif len(arg) > 12 and arg[:12] == 'individuals:': | |
| 24 p1_input = arg[12:] | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
25 elif len(arg) > 11 and arg[:11] == 'population:': |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
26 file, name = arg[11:].split(':', 1) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
27 population_info.append((file, name)) |
| 0 | 28 |
| 29 p_total = Population() | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
30 p_total.from_wrapped_dict(ind_arg) |
| 0 | 31 |
| 32 ################################################################################ | |
| 33 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
34 gd_util.mkdir_p(extra_files_path) |
| 0 | 35 |
| 36 ################################################################################ | |
| 37 | |
| 38 prog = 'coverage' | |
| 39 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
40 args = [ prog ] |
| 0 | 41 args.append(input) |
| 42 args.append(data_source) | |
| 43 | |
| 44 user_coverage_file = os.path.join(extra_files_path, 'coverage.txt') | |
| 45 args.append(user_coverage_file) | |
| 46 | |
| 47 population_list = [] | |
| 48 | |
| 49 if all_individuals: | |
| 50 tags = p_total.tag_list() | |
| 51 elif p1_input is not None: | |
| 52 p1 = Population() | |
| 53 this_pop = Population() | |
| 54 this_pop.from_population_file(p1_input) | |
| 55 population_list.append(this_pop) | |
| 56 p1.from_population_file(p1_input) | |
| 57 if not p_total.is_superset(p1): | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
58 gd_util.die('There is an individual in the population that is not in the SNP table') |
| 0 | 59 tags = p1.tag_list() |
| 60 else: | |
| 61 tags = [] | |
| 62 for population_file, population_name in population_info: | |
| 63 population = Population() | |
| 64 this_pop = Population() | |
| 65 this_pop.from_population_file(population_file) | |
| 66 population_list.append(this_pop) | |
| 67 population.from_population_file(population_file) | |
| 68 if not p_total.is_superset(population): | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
69 gd_util.die('There is an individual in the {} population that is not in the SNP table'.format(population_name)) |
| 0 | 70 columns = population.column_list() |
| 71 for column in columns: | |
| 72 tags.append('{0}:{1}'.format(column, population_name)) | |
| 73 | |
| 74 for tag in tags: | |
| 75 args.append(tag) | |
| 76 | |
| 77 ## text output | |
| 78 coverage_file = 'coverage.txt' | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
79 with open(coverage_file, 'w') as fh: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
80 gd_util.run_program(prog, args, stdout=fh) |
| 0 | 81 |
| 82 ## graphical output | |
| 83 coverage2_file = 'coverage2.txt' | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
84 with open(coverage_file) as fh, open(coverage2_file, 'w') as ofh: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
85 for line in fh: |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
86 line = line.rstrip('\r\n') |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
87 elems = line.split('\t') |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
88 name = elems.pop(0) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
89 values = [ elems[0] ] |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
90 for idx in range(1, len(elems)): |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
91 val = str(float(elems[idx]) - float(elems[idx-1])) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
92 values.append(val) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
93 print >> ofh, '{0}\t{1}'.format(name, '\t'.join(values)) |
| 0 | 94 |
| 95 ################################################################################ | |
| 96 | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
97 prog = 'Rscript' |
| 0 | 98 |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
99 args = [ prog ] |
| 0 | 100 |
| 101 _realpath = os.path.realpath(__file__) | |
| 102 _script_dir = os.path.dirname(_realpath) | |
| 103 r_script_file = os.path.join(_script_dir, 'coverage_plot.r') | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
104 args.append(r_script_file) |
| 0 | 105 |
| 106 pdf_file = os.path.join(extra_files_path, 'coverage.pdf') | |
|
27
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
107 args.append(pdf_file) |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
108 |
|
8997f2ca8c7a
Update to Miller Lab devshed revision bae0d3306d3b
Richard Burhans <burhans@bx.psu.edu>
parents:
0
diff
changeset
|
109 gd_util.run_program(prog, args) |
| 0 | 110 |
| 111 ################################################################################ | |
| 112 | |
| 113 info_page = gd_composite.InfoPage() | |
| 114 info_page.set_title('Coverage distributions Galaxy Composite Dataset') | |
| 115 | |
| 116 display_file = gd_composite.DisplayFile() | |
| 117 display_value = gd_composite.DisplayValue() | |
| 118 | |
| 119 out_pdf = gd_composite.Parameter(name='coverage.pdf', value='coverage.pdf', display_type=display_file) | |
| 120 out_txt = gd_composite.Parameter(name='coverage.txt', value='coverage.txt', display_type=display_file) | |
| 121 | |
| 122 info_page.add_output_parameter(out_pdf) | |
| 123 info_page.add_output_parameter(out_txt) | |
| 124 | |
| 125 if data_source == '0': | |
| 126 data_source_value = 'sequence coverage' | |
| 127 elif data_source == '1': | |
| 128 data_source_value = 'estimated genotype' | |
| 129 | |
| 130 in_data_source = gd_composite.Parameter(description='Data source', value=data_source_value, display_type=display_value) | |
| 131 | |
| 132 info_page.add_input_parameter(in_data_source) | |
| 133 | |
| 134 if population_list: | |
| 135 misc_populations = gd_composite.Parameter(name='Populations', value=population_list, display_type=gd_composite.DisplayPopulationList()) | |
| 136 info_page.add_misc(misc_populations) | |
| 137 else: | |
| 138 misc_individuals = gd_composite.Parameter(name='Individuals', value=tags, display_type=gd_composite.DisplayTagList()) | |
| 139 info_page.add_misc(misc_individuals) | |
| 140 | |
| 141 with open (output, 'w') as ofh: | |
| 142 print >> ofh, info_page.render() | |
| 143 | |
| 144 sys.exit(0) | |
| 145 |
