comparison bam_to_bigwig/bam_to_wiggle.py @ 0:d2c1af657010

Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
author brad-chapman
date Tue, 07 Jun 2011 16:25:46 -0400
parents
children 0ff100a057ef
comparison
equal deleted inserted replaced
-1:000000000000 0:d2c1af657010
1 #!/usr/bin/env python
2 """Convert BAM files to BigWig file format in a specified region.
3
4 Usage:
5 bam_to_wiggle.py <BAM file> [<YAML config>]
6 [--outfile=<output file name>
7 --chrom=<chrom>
8 --start=<start>
9 --end=<end>]
10
11 chrom start and end are optional, in which case they default to everything.
12
13 The config file is in YAML format and specifies the location of the wigToBigWig
14 program from UCSC:
15
16 program:
17 ucsc_bigwig: wigToBigWig
18
19 If not specified, these will be assumed to be present in the system path.
20
21 The script requires:
22 pysam (http://code.google.com/p/pysam/)
23 wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
24 """
25 import os
26 import sys
27 import subprocess
28 from optparse import OptionParser
29 from contextlib import contextmanager
30
31 import yaml
32 import pysam
33
34 def main(bam_file, config_file=None, chrom='all', start=0, end=None,
35 outfile=None):
36 if config_file:
37 with open(config_file) as in_handle:
38 config = yaml.load(in_handle)
39 else:
40 config = {"program": {"ucsc_bigwig" : "wigToBigWig"}}
41 if outfile is None:
42 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
43 if start > 0:
44 start = int(start) - 1
45 if end is not None:
46 end = int(end)
47 regions = [(chrom, start, end)]
48 if not os.path.exists(outfile):
49 wig_file = "%s.wig" % os.path.splitext(bam_file)[0]
50 with open(wig_file, "w") as out_handle:
51 chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle)
52 try:
53 if wig_valid:
54 convert_to_bigwig(wig_file, chr_sizes, config, outfile)
55 finally:
56 os.remove(wig_file)
57
58 @contextmanager
59 def indexed_bam(bam_file, config):
60 if not os.path.exists(bam_file + ".bai"):
61 pysam.index(bam_file)
62 sam_reader = pysam.Samfile(bam_file, "rb")
63 yield sam_reader
64 sam_reader.close()
65
66 def write_bam_track(bam_file, regions, config, out_handle):
67 out_handle.write("track %s\n" % " ".join(["type=wiggle_0",
68 "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0],
69 "visibility=full",
70 ]))
71 sizes = []
72 is_valid = False
73 with indexed_bam(bam_file, config) as work_bam:
74 for ref_info in work_bam.header.get("SQ", []):
75 sizes.append((ref_info["SN"], ref_info["LN"]))
76 if len(regions) == 1 and regions[0][0] == "all":
77 regions = []
78 for ref_info in work_bam.header.get("SQ", []):
79 regions.append((ref_info["SN"], 0, None))
80 for chrom, start, end in regions:
81 if end is None:
82 for ref_info in work_bam.header.get("SQ", []):
83 if ref_info["SN"] == chrom:
84 end = int(ref_info["LN"])
85 break
86 assert end is not None, "Could not find %s in header" % chrom
87 out_handle.write("variableStep chrom=%s\n" % chrom)
88 for col in work_bam.pileup(chrom, start, end):
89 out_handle.write("%s %s\n" % (col.pos+1, col.n))
90 is_valid = True
91 return sizes, is_valid
92
93 def convert_to_bigwig(wig_file, chr_sizes, config, bw_file=None):
94 if not bw_file:
95 bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0])
96 size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0])
97 with open(size_file, "w") as out_handle:
98 for chrom, size in chr_sizes:
99 out_handle.write("%s\t%s\n" % (chrom, size))
100 try:
101 cl = [config["program"]["ucsc_bigwig"], wig_file, size_file, bw_file]
102 subprocess.check_call(cl)
103 finally:
104 os.remove(size_file)
105 return bw_file
106
107 if __name__ == "__main__":
108 parser = OptionParser()
109 parser.add_option("-o", "--outfile", dest="outfile")
110 parser.add_option("-c", "--chrom", dest="chrom")
111 parser.add_option("-s", "--start", dest="start")
112 parser.add_option("-e", "--end", dest="end")
113 (options, args) = parser.parse_args()
114 if len(args) not in [1, 2]:
115 print "Incorrect arguments"
116 print __doc__
117 sys.exit()
118 kwargs = dict(
119 outfile=options.outfile,
120 chrom=options.chrom or 'all',
121 start=options.start or 0,
122 end=options.end)
123 main(*args, **kwargs)