comparison bam_to_bigwig/bam_to_bigwig.py @ 4:14e5258d1b39

v0.1.1: Add the forgotten --split argument in XML and moves to using the new bedtools. Thanks to David Leader
author Brad Chapman <chapmanb@50mail.com>
date Tue, 11 Feb 2014 15:55:22 -0500
parents e2edfa478eb4
children 52bcd04ee0d6
comparison
equal deleted inserted replaced
3:294e9dae5a9b 4:14e5258d1b39
2 #Original version copyright Brad Chapman with revisions from Peter Cock 2 #Original version copyright Brad Chapman with revisions from Peter Cock
3 #and ideas from Lance Parsons 3 #and ideas from Lance Parsons
4 """Convert BAM files to BigWig file format in a specified region. 4 """Convert BAM files to BigWig file format in a specified region.
5 5
6 Usage: 6 Usage:
7 bam_to_wiggle.py <BAM file> [<YAML config>] [--outfile=<output file name>] 7 bam_to_wiggle.py <BAM file> [--outfile=<output file name>] [--split]
8 8
9 The optional config file (not used by the Galaxy interface) is in YAML format 9 The --split argument is passed to bedtools genomecov
10 and specifies the location of the binary tools.
11
12 program:
13 bedtools_genomeCoverageBed: genomeCoverageBed
14 ucsc_bedGraphToBigWig: bedGraphToBigWig
15
16 If not specified, these will be assumed to be present in the system path.
17 10
18 The script requires: 11 The script requires:
19 pysam (http://code.google.com/p/pysam/) 12 pysam (http://code.google.com/p/pysam/)
20 genomeCoverageBed from BedTools (http://code.google.com/p/bedtools/) 13 bedtools genomecov (http://code.google.com/p/bedtools/)
21 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) 14 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
22 If a configuration file is used, then PyYAML is also required (http://pyyaml.org/)
23 """ 15 """
24 import os 16 import os
25 import sys 17 import sys
26 import subprocess 18 import subprocess
27 import tempfile 19 import tempfile
28 from optparse import OptionParser 20 from optparse import OptionParser
29 from contextlib import contextmanager, closing 21 from contextlib import contextmanager, closing
30 22
31 import pysam 23 import pysam
32 24
33 def main(bam_file, config_file=None, outfile=None, split=False): 25 def main(bam_file, outfile=None, split=False):
34 if config_file: 26 config = {"program": {"ucsc_bedGraphToBigWig": ["bedGraphToBigWig"],
35 import yaml 27 "bedtools_genomeCoverageBed": ["bedtools", "genomecov"]}}
36 with open(config_file) as in_handle:
37 config = yaml.load(in_handle)
38 else:
39 config = {"program": {"ucsc_bedGraphToBigWig" : "bedGraphToBigWig",
40 "bedtools_genomeCoverageBed" : "genomeCoverageBed"}}
41 if outfile is None: 28 if outfile is None:
42 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] 29 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
43 if os.path.abspath(bam_file) == os.path.abspath(outfile): 30 if os.path.abspath(bam_file) == os.path.abspath(outfile):
44 sys.stderr.write("Bad arguments, input and output files are the same.\n") 31 sys.stderr.write("Bad arguments, input and output files are the same.\n")
45 sys.exit(1) 32 sys.exit(1)
57 temp_file = temp_handle.name 44 temp_file = temp_handle.name
58 with closing(temp_handle): 45 with closing(temp_handle):
59 print "Calculating coverage..." 46 print "Calculating coverage..."
60 convert_to_graph(bam_file, split, config, temp_handle) 47 convert_to_graph(bam_file, split, config, temp_handle)
61 try: 48 try:
62 print "Converting %i MB graph file to bigwig..." % (os.path.getsize(temp_file) // (1024*1024)) 49 print "Converting %i MB graph file to bigwig..." % (os.path.getsize(temp_file) // (1024 * 1024))
63 #Can't pipe this as stdin due to converter design, 50 #Can't pipe this as stdin due to converter design,
64 #https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html 51 #https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html
65 convert_to_bigwig(temp_file, sizes, config, outfile) 52 convert_to_bigwig(temp_file, sizes, config, outfile)
66 finally: 53 finally:
67 if os.path.isfile(temp_file): 54 if os.path.isfile(temp_file):
80 with indexed_bam(bam_file, config) as work_bam: 67 with indexed_bam(bam_file, config) as work_bam:
81 sizes = zip(work_bam.references, work_bam.lengths) 68 sizes = zip(work_bam.references, work_bam.lengths)
82 return sizes 69 return sizes
83 70
84 def convert_to_graph(bam_file, split, config, out_handle): 71 def convert_to_graph(bam_file, split, config, out_handle):
85 cl = [config["program"]["bedtools_genomeCoverageBed"], "-ibam", bam_file, "-bg"] 72 cl = [config["program"]["bedtools_genomeCoverageBed"]] + ["-ibam", bam_file, "-bg"]
86 if split: 73 if split:
87 cl.append("-split") 74 cl.append("-split")
88 subprocess.check_call(cl, stdout=out_handle) 75 subprocess.check_call(cl, stdout=out_handle)
89 76
90 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): 77 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file):
92 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) 79 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0])
93 with open(size_file, "w") as out_handle: 80 with open(size_file, "w") as out_handle:
94 for chrom, size in chr_sizes: 81 for chrom, size in chr_sizes:
95 out_handle.write("%s\t%s\n" % (chrom, size)) 82 out_handle.write("%s\t%s\n" % (chrom, size))
96 try: 83 try:
97 cl = [config["program"]["ucsc_bedGraphToBigWig"], bedgraph_file, size_file, bw_file] 84 cl = [config["program"]["ucsc_bedGraphToBigWig"]] + [bedgraph_file, size_file, bw_file]
98 subprocess.check_call(cl) 85 subprocess.check_call(cl)
99 finally: 86 finally:
100 os.remove(size_file) 87 os.remove(size_file)
101 return bw_file 88 return bw_file
102 89