Mercurial > repos > brad-chapman > bam_to_bigwig
diff bam_to_bigwig/bam_to_wiggle.py @ 0:d2c1af657010
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
author | brad-chapman |
---|---|
date | Tue, 07 Jun 2011 16:25:46 -0400 |
parents | |
children | 0ff100a057ef |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bam_to_bigwig/bam_to_wiggle.py Tue Jun 07 16:25:46 2011 -0400 @@ -0,0 +1,123 @@ +#!/usr/bin/env python +"""Convert BAM files to BigWig file format in a specified region. + +Usage: + bam_to_wiggle.py <BAM file> [<YAML config>] + [--outfile=<output file name> + --chrom=<chrom> + --start=<start> + --end=<end>] + +chrom start and end are optional, in which case they default to everything. + +The config file is in YAML format and specifies the location of the wigToBigWig +program from UCSC: + +program: + ucsc_bigwig: wigToBigWig + +If not specified, these will be assumed to be present in the system path. + +The script requires: + pysam (http://code.google.com/p/pysam/) + wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) +""" +import os +import sys +import subprocess +from optparse import OptionParser +from contextlib import contextmanager + +import yaml +import pysam + +def main(bam_file, config_file=None, chrom='all', start=0, end=None, + outfile=None): + if config_file: + with open(config_file) as in_handle: + config = yaml.load(in_handle) + else: + config = {"program": {"ucsc_bigwig" : "wigToBigWig"}} + if outfile is None: + outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] + if start > 0: + start = int(start) - 1 + if end is not None: + end = int(end) + regions = [(chrom, start, end)] + if not os.path.exists(outfile): + wig_file = "%s.wig" % os.path.splitext(bam_file)[0] + with open(wig_file, "w") as out_handle: + chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle) + try: + if wig_valid: + convert_to_bigwig(wig_file, chr_sizes, config, outfile) + finally: + os.remove(wig_file) + +@contextmanager +def indexed_bam(bam_file, config): + if not os.path.exists(bam_file + ".bai"): + pysam.index(bam_file) + sam_reader = pysam.Samfile(bam_file, "rb") + yield sam_reader + sam_reader.close() + +def write_bam_track(bam_file, regions, config, out_handle): + out_handle.write("track %s\n" % " ".join(["type=wiggle_0", + "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0], + "visibility=full", + ])) + sizes = [] + is_valid = False + with indexed_bam(bam_file, config) as work_bam: + for ref_info in work_bam.header.get("SQ", []): + sizes.append((ref_info["SN"], ref_info["LN"])) + if len(regions) == 1 and regions[0][0] == "all": + regions = [] + for ref_info in work_bam.header.get("SQ", []): + regions.append((ref_info["SN"], 0, None)) + for chrom, start, end in regions: + if end is None: + for ref_info in work_bam.header.get("SQ", []): + if ref_info["SN"] == chrom: + end = int(ref_info["LN"]) + break + assert end is not None, "Could not find %s in header" % chrom + out_handle.write("variableStep chrom=%s\n" % chrom) + for col in work_bam.pileup(chrom, start, end): + out_handle.write("%s %s\n" % (col.pos+1, col.n)) + is_valid = True + return sizes, is_valid + +def convert_to_bigwig(wig_file, chr_sizes, config, bw_file=None): + if not bw_file: + bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0]) + size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0]) + with open(size_file, "w") as out_handle: + for chrom, size in chr_sizes: + out_handle.write("%s\t%s\n" % (chrom, size)) + try: + cl = [config["program"]["ucsc_bigwig"], wig_file, size_file, bw_file] + subprocess.check_call(cl) + finally: + os.remove(size_file) + return bw_file + +if __name__ == "__main__": + parser = OptionParser() + parser.add_option("-o", "--outfile", dest="outfile") + parser.add_option("-c", "--chrom", dest="chrom") + parser.add_option("-s", "--start", dest="start") + parser.add_option("-e", "--end", dest="end") + (options, args) = parser.parse_args() + if len(args) not in [1, 2]: + print "Incorrect arguments" + print __doc__ + sys.exit() + kwargs = dict( + outfile=options.outfile, + chrom=options.chrom or 'all', + start=options.start or 0, + end=options.end) + main(*args, **kwargs)