view bam_to_bigwig/bam_to_wiggle.py @ 0:d2c1af657010

Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
author brad-chapman
date Tue, 07 Jun 2011 16:25:46 -0400
parents
children 0ff100a057ef
line wrap: on
line source

#!/usr/bin/env python
"""Convert BAM files to BigWig file format in a specified region.

Usage:
    bam_to_wiggle.py <BAM file> [<YAML config>]
    [--outfile=<output file name>
     --chrom=<chrom>
     --start=<start>
     --end=<end>]

chrom start and end are optional, in which case they default to everything.

The config file is in YAML format and specifies the location of the wigToBigWig
program from UCSC:

program:
  ucsc_bigwig: wigToBigWig

If not specified, these will be assumed to be present in the system path.

The script requires:
    pysam (http://code.google.com/p/pysam/)
    wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
"""
import os
import sys
import subprocess
from optparse import OptionParser
from contextlib import contextmanager

import yaml
import pysam

def main(bam_file, config_file=None, chrom='all', start=0, end=None,
         outfile=None):
    if config_file:
        with open(config_file) as in_handle:
            config = yaml.load(in_handle)
    else:
        config = {"program": {"ucsc_bigwig" : "wigToBigWig"}}
    if outfile is None:
        outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
    if start > 0:
        start = int(start) - 1
    if end is not None:
        end = int(end)
    regions = [(chrom, start, end)]
    if not os.path.exists(outfile):
        wig_file = "%s.wig" % os.path.splitext(bam_file)[0]
        with open(wig_file, "w") as out_handle:
            chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle)
        try:
            if wig_valid:
                convert_to_bigwig(wig_file, chr_sizes, config, outfile)
        finally:
            os.remove(wig_file)

@contextmanager
def indexed_bam(bam_file, config):
    if not os.path.exists(bam_file + ".bai"):
        pysam.index(bam_file)
    sam_reader = pysam.Samfile(bam_file, "rb")
    yield sam_reader
    sam_reader.close()

def write_bam_track(bam_file, regions, config, out_handle):
    out_handle.write("track %s\n" % " ".join(["type=wiggle_0",
        "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0],
        "visibility=full",
        ]))
    sizes = []
    is_valid = False
    with indexed_bam(bam_file, config) as work_bam:
        for ref_info in work_bam.header.get("SQ", []):
            sizes.append((ref_info["SN"], ref_info["LN"]))
        if len(regions) == 1 and regions[0][0] == "all":
            regions = []
            for ref_info in work_bam.header.get("SQ", []):
                regions.append((ref_info["SN"], 0, None))
        for chrom, start, end in regions:
            if end is None:
                for ref_info in work_bam.header.get("SQ", []):
                    if ref_info["SN"] == chrom:
                        end = int(ref_info["LN"])
                        break
            assert end is not None, "Could not find %s in header" % chrom
            out_handle.write("variableStep chrom=%s\n" % chrom)
            for col in work_bam.pileup(chrom, start, end):
                out_handle.write("%s %s\n" % (col.pos+1, col.n))
                is_valid = True
    return sizes, is_valid

def convert_to_bigwig(wig_file, chr_sizes, config, bw_file=None):
    if not bw_file:
        bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0])
    size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0])
    with open(size_file, "w") as out_handle:
        for chrom, size in chr_sizes:
            out_handle.write("%s\t%s\n" % (chrom, size))
    try:
        cl = [config["program"]["ucsc_bigwig"], wig_file, size_file, bw_file]
        subprocess.check_call(cl)
    finally:
        os.remove(size_file)
    return bw_file

if __name__ == "__main__":
    parser = OptionParser()
    parser.add_option("-o", "--outfile", dest="outfile")
    parser.add_option("-c", "--chrom", dest="chrom")
    parser.add_option("-s", "--start", dest="start")
    parser.add_option("-e", "--end", dest="end")
    (options, args) = parser.parse_args()
    if len(args) not in [1, 2]:
        print "Incorrect arguments"
        print __doc__
        sys.exit()
    kwargs = dict(
        outfile=options.outfile,
        chrom=options.chrom or 'all',
        start=options.start or 0,
        end=options.end)
    main(*args, **kwargs)