Mercurial > repos > brad-chapman > bam_to_bigwig
comparison bam_to_bigwig/bam_to_wiggle.py @ 0:d2c1af657010
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
author | brad-chapman |
---|---|
date | Tue, 07 Jun 2011 16:25:46 -0400 |
parents | |
children | 0ff100a057ef |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d2c1af657010 |
---|---|
1 #!/usr/bin/env python | |
2 """Convert BAM files to BigWig file format in a specified region. | |
3 | |
4 Usage: | |
5 bam_to_wiggle.py <BAM file> [<YAML config>] | |
6 [--outfile=<output file name> | |
7 --chrom=<chrom> | |
8 --start=<start> | |
9 --end=<end>] | |
10 | |
11 chrom start and end are optional, in which case they default to everything. | |
12 | |
13 The config file is in YAML format and specifies the location of the wigToBigWig | |
14 program from UCSC: | |
15 | |
16 program: | |
17 ucsc_bigwig: wigToBigWig | |
18 | |
19 If not specified, these will be assumed to be present in the system path. | |
20 | |
21 The script requires: | |
22 pysam (http://code.google.com/p/pysam/) | |
23 wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) | |
24 """ | |
25 import os | |
26 import sys | |
27 import subprocess | |
28 from optparse import OptionParser | |
29 from contextlib import contextmanager | |
30 | |
31 import yaml | |
32 import pysam | |
33 | |
34 def main(bam_file, config_file=None, chrom='all', start=0, end=None, | |
35 outfile=None): | |
36 if config_file: | |
37 with open(config_file) as in_handle: | |
38 config = yaml.load(in_handle) | |
39 else: | |
40 config = {"program": {"ucsc_bigwig" : "wigToBigWig"}} | |
41 if outfile is None: | |
42 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] | |
43 if start > 0: | |
44 start = int(start) - 1 | |
45 if end is not None: | |
46 end = int(end) | |
47 regions = [(chrom, start, end)] | |
48 if not os.path.exists(outfile): | |
49 wig_file = "%s.wig" % os.path.splitext(bam_file)[0] | |
50 with open(wig_file, "w") as out_handle: | |
51 chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle) | |
52 try: | |
53 if wig_valid: | |
54 convert_to_bigwig(wig_file, chr_sizes, config, outfile) | |
55 finally: | |
56 os.remove(wig_file) | |
57 | |
58 @contextmanager | |
59 def indexed_bam(bam_file, config): | |
60 if not os.path.exists(bam_file + ".bai"): | |
61 pysam.index(bam_file) | |
62 sam_reader = pysam.Samfile(bam_file, "rb") | |
63 yield sam_reader | |
64 sam_reader.close() | |
65 | |
66 def write_bam_track(bam_file, regions, config, out_handle): | |
67 out_handle.write("track %s\n" % " ".join(["type=wiggle_0", | |
68 "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0], | |
69 "visibility=full", | |
70 ])) | |
71 sizes = [] | |
72 is_valid = False | |
73 with indexed_bam(bam_file, config) as work_bam: | |
74 for ref_info in work_bam.header.get("SQ", []): | |
75 sizes.append((ref_info["SN"], ref_info["LN"])) | |
76 if len(regions) == 1 and regions[0][0] == "all": | |
77 regions = [] | |
78 for ref_info in work_bam.header.get("SQ", []): | |
79 regions.append((ref_info["SN"], 0, None)) | |
80 for chrom, start, end in regions: | |
81 if end is None: | |
82 for ref_info in work_bam.header.get("SQ", []): | |
83 if ref_info["SN"] == chrom: | |
84 end = int(ref_info["LN"]) | |
85 break | |
86 assert end is not None, "Could not find %s in header" % chrom | |
87 out_handle.write("variableStep chrom=%s\n" % chrom) | |
88 for col in work_bam.pileup(chrom, start, end): | |
89 out_handle.write("%s %s\n" % (col.pos+1, col.n)) | |
90 is_valid = True | |
91 return sizes, is_valid | |
92 | |
93 def convert_to_bigwig(wig_file, chr_sizes, config, bw_file=None): | |
94 if not bw_file: | |
95 bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0]) | |
96 size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0]) | |
97 with open(size_file, "w") as out_handle: | |
98 for chrom, size in chr_sizes: | |
99 out_handle.write("%s\t%s\n" % (chrom, size)) | |
100 try: | |
101 cl = [config["program"]["ucsc_bigwig"], wig_file, size_file, bw_file] | |
102 subprocess.check_call(cl) | |
103 finally: | |
104 os.remove(size_file) | |
105 return bw_file | |
106 | |
107 if __name__ == "__main__": | |
108 parser = OptionParser() | |
109 parser.add_option("-o", "--outfile", dest="outfile") | |
110 parser.add_option("-c", "--chrom", dest="chrom") | |
111 parser.add_option("-s", "--start", dest="start") | |
112 parser.add_option("-e", "--end", dest="end") | |
113 (options, args) = parser.parse_args() | |
114 if len(args) not in [1, 2]: | |
115 print "Incorrect arguments" | |
116 print __doc__ | |
117 sys.exit() | |
118 kwargs = dict( | |
119 outfile=options.outfile, | |
120 chrom=options.chrom or 'all', | |
121 start=options.start or 0, | |
122 end=options.end) | |
123 main(*args, **kwargs) |