Mercurial > repos > brad-chapman > bam_to_bigwig
comparison bam_to_bigwig/bam_to_wiggle.py @ 0:d2c1af657010
Migrated tool version 0.0.1 from old tool shed archive to new tool shed repository
| author | brad-chapman |
|---|---|
| date | Tue, 07 Jun 2011 16:25:46 -0400 |
| parents | |
| children | 0ff100a057ef |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d2c1af657010 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """Convert BAM files to BigWig file format in a specified region. | |
| 3 | |
| 4 Usage: | |
| 5 bam_to_wiggle.py <BAM file> [<YAML config>] | |
| 6 [--outfile=<output file name> | |
| 7 --chrom=<chrom> | |
| 8 --start=<start> | |
| 9 --end=<end>] | |
| 10 | |
| 11 chrom start and end are optional, in which case they default to everything. | |
| 12 | |
| 13 The config file is in YAML format and specifies the location of the wigToBigWig | |
| 14 program from UCSC: | |
| 15 | |
| 16 program: | |
| 17 ucsc_bigwig: wigToBigWig | |
| 18 | |
| 19 If not specified, these will be assumed to be present in the system path. | |
| 20 | |
| 21 The script requires: | |
| 22 pysam (http://code.google.com/p/pysam/) | |
| 23 wigToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) | |
| 24 """ | |
| 25 import os | |
| 26 import sys | |
| 27 import subprocess | |
| 28 from optparse import OptionParser | |
| 29 from contextlib import contextmanager | |
| 30 | |
| 31 import yaml | |
| 32 import pysam | |
| 33 | |
| 34 def main(bam_file, config_file=None, chrom='all', start=0, end=None, | |
| 35 outfile=None): | |
| 36 if config_file: | |
| 37 with open(config_file) as in_handle: | |
| 38 config = yaml.load(in_handle) | |
| 39 else: | |
| 40 config = {"program": {"ucsc_bigwig" : "wigToBigWig"}} | |
| 41 if outfile is None: | |
| 42 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] | |
| 43 if start > 0: | |
| 44 start = int(start) - 1 | |
| 45 if end is not None: | |
| 46 end = int(end) | |
| 47 regions = [(chrom, start, end)] | |
| 48 if not os.path.exists(outfile): | |
| 49 wig_file = "%s.wig" % os.path.splitext(bam_file)[0] | |
| 50 with open(wig_file, "w") as out_handle: | |
| 51 chr_sizes, wig_valid = write_bam_track(bam_file, regions, config, out_handle) | |
| 52 try: | |
| 53 if wig_valid: | |
| 54 convert_to_bigwig(wig_file, chr_sizes, config, outfile) | |
| 55 finally: | |
| 56 os.remove(wig_file) | |
| 57 | |
| 58 @contextmanager | |
| 59 def indexed_bam(bam_file, config): | |
| 60 if not os.path.exists(bam_file + ".bai"): | |
| 61 pysam.index(bam_file) | |
| 62 sam_reader = pysam.Samfile(bam_file, "rb") | |
| 63 yield sam_reader | |
| 64 sam_reader.close() | |
| 65 | |
| 66 def write_bam_track(bam_file, regions, config, out_handle): | |
| 67 out_handle.write("track %s\n" % " ".join(["type=wiggle_0", | |
| 68 "name=%s" % os.path.splitext(os.path.split(bam_file)[-1])[0], | |
| 69 "visibility=full", | |
| 70 ])) | |
| 71 sizes = [] | |
| 72 is_valid = False | |
| 73 with indexed_bam(bam_file, config) as work_bam: | |
| 74 for ref_info in work_bam.header.get("SQ", []): | |
| 75 sizes.append((ref_info["SN"], ref_info["LN"])) | |
| 76 if len(regions) == 1 and regions[0][0] == "all": | |
| 77 regions = [] | |
| 78 for ref_info in work_bam.header.get("SQ", []): | |
| 79 regions.append((ref_info["SN"], 0, None)) | |
| 80 for chrom, start, end in regions: | |
| 81 if end is None: | |
| 82 for ref_info in work_bam.header.get("SQ", []): | |
| 83 if ref_info["SN"] == chrom: | |
| 84 end = int(ref_info["LN"]) | |
| 85 break | |
| 86 assert end is not None, "Could not find %s in header" % chrom | |
| 87 out_handle.write("variableStep chrom=%s\n" % chrom) | |
| 88 for col in work_bam.pileup(chrom, start, end): | |
| 89 out_handle.write("%s %s\n" % (col.pos+1, col.n)) | |
| 90 is_valid = True | |
| 91 return sizes, is_valid | |
| 92 | |
| 93 def convert_to_bigwig(wig_file, chr_sizes, config, bw_file=None): | |
| 94 if not bw_file: | |
| 95 bw_file = "%s.bigwig" % (os.path.splitext(wig_file)[0]) | |
| 96 size_file = "%s-sizes.txt" % (os.path.splitext(wig_file)[0]) | |
| 97 with open(size_file, "w") as out_handle: | |
| 98 for chrom, size in chr_sizes: | |
| 99 out_handle.write("%s\t%s\n" % (chrom, size)) | |
| 100 try: | |
| 101 cl = [config["program"]["ucsc_bigwig"], wig_file, size_file, bw_file] | |
| 102 subprocess.check_call(cl) | |
| 103 finally: | |
| 104 os.remove(size_file) | |
| 105 return bw_file | |
| 106 | |
| 107 if __name__ == "__main__": | |
| 108 parser = OptionParser() | |
| 109 parser.add_option("-o", "--outfile", dest="outfile") | |
| 110 parser.add_option("-c", "--chrom", dest="chrom") | |
| 111 parser.add_option("-s", "--start", dest="start") | |
| 112 parser.add_option("-e", "--end", dest="end") | |
| 113 (options, args) = parser.parse_args() | |
| 114 if len(args) not in [1, 2]: | |
| 115 print "Incorrect arguments" | |
| 116 print __doc__ | |
| 117 sys.exit() | |
| 118 kwargs = dict( | |
| 119 outfile=options.outfile, | |
| 120 chrom=options.chrom or 'all', | |
| 121 start=options.start or 0, | |
| 122 end=options.end) | |
| 123 main(*args, **kwargs) |
