Mercurial > repos > brad-chapman > bam_to_bigwig
comparison bam_to_bigwig/bam_to_bigwig.py @ 4:14e5258d1b39
v0.1.1: Add the forgotten --split argument in XML and moves to using the new bedtools. Thanks to David Leader
author | Brad Chapman <chapmanb@50mail.com> |
---|---|
date | Tue, 11 Feb 2014 15:55:22 -0500 |
parents | e2edfa478eb4 |
children | 52bcd04ee0d6 |
comparison
equal
deleted
inserted
replaced
3:294e9dae5a9b | 4:14e5258d1b39 |
---|---|
2 #Original version copyright Brad Chapman with revisions from Peter Cock | 2 #Original version copyright Brad Chapman with revisions from Peter Cock |
3 #and ideas from Lance Parsons | 3 #and ideas from Lance Parsons |
4 """Convert BAM files to BigWig file format in a specified region. | 4 """Convert BAM files to BigWig file format in a specified region. |
5 | 5 |
6 Usage: | 6 Usage: |
7 bam_to_wiggle.py <BAM file> [<YAML config>] [--outfile=<output file name>] | 7 bam_to_wiggle.py <BAM file> [--outfile=<output file name>] [--split] |
8 | 8 |
9 The optional config file (not used by the Galaxy interface) is in YAML format | 9 The --split argument is passed to bedtools genomecov |
10 and specifies the location of the binary tools. | |
11 | |
12 program: | |
13 bedtools_genomeCoverageBed: genomeCoverageBed | |
14 ucsc_bedGraphToBigWig: bedGraphToBigWig | |
15 | |
16 If not specified, these will be assumed to be present in the system path. | |
17 | 10 |
18 The script requires: | 11 The script requires: |
19 pysam (http://code.google.com/p/pysam/) | 12 pysam (http://code.google.com/p/pysam/) |
20 genomeCoverageBed from BedTools (http://code.google.com/p/bedtools/) | 13 bedtools genomecov (http://code.google.com/p/bedtools/) |
21 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) | 14 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) |
22 If a configuration file is used, then PyYAML is also required (http://pyyaml.org/) | |
23 """ | 15 """ |
24 import os | 16 import os |
25 import sys | 17 import sys |
26 import subprocess | 18 import subprocess |
27 import tempfile | 19 import tempfile |
28 from optparse import OptionParser | 20 from optparse import OptionParser |
29 from contextlib import contextmanager, closing | 21 from contextlib import contextmanager, closing |
30 | 22 |
31 import pysam | 23 import pysam |
32 | 24 |
33 def main(bam_file, config_file=None, outfile=None, split=False): | 25 def main(bam_file, outfile=None, split=False): |
34 if config_file: | 26 config = {"program": {"ucsc_bedGraphToBigWig": ["bedGraphToBigWig"], |
35 import yaml | 27 "bedtools_genomeCoverageBed": ["bedtools", "genomecov"]}} |
36 with open(config_file) as in_handle: | |
37 config = yaml.load(in_handle) | |
38 else: | |
39 config = {"program": {"ucsc_bedGraphToBigWig" : "bedGraphToBigWig", | |
40 "bedtools_genomeCoverageBed" : "genomeCoverageBed"}} | |
41 if outfile is None: | 28 if outfile is None: |
42 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] | 29 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] |
43 if os.path.abspath(bam_file) == os.path.abspath(outfile): | 30 if os.path.abspath(bam_file) == os.path.abspath(outfile): |
44 sys.stderr.write("Bad arguments, input and output files are the same.\n") | 31 sys.stderr.write("Bad arguments, input and output files are the same.\n") |
45 sys.exit(1) | 32 sys.exit(1) |
57 temp_file = temp_handle.name | 44 temp_file = temp_handle.name |
58 with closing(temp_handle): | 45 with closing(temp_handle): |
59 print "Calculating coverage..." | 46 print "Calculating coverage..." |
60 convert_to_graph(bam_file, split, config, temp_handle) | 47 convert_to_graph(bam_file, split, config, temp_handle) |
61 try: | 48 try: |
62 print "Converting %i MB graph file to bigwig..." % (os.path.getsize(temp_file) // (1024*1024)) | 49 print "Converting %i MB graph file to bigwig..." % (os.path.getsize(temp_file) // (1024 * 1024)) |
63 #Can't pipe this as stdin due to converter design, | 50 #Can't pipe this as stdin due to converter design, |
64 #https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html | 51 #https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html |
65 convert_to_bigwig(temp_file, sizes, config, outfile) | 52 convert_to_bigwig(temp_file, sizes, config, outfile) |
66 finally: | 53 finally: |
67 if os.path.isfile(temp_file): | 54 if os.path.isfile(temp_file): |
80 with indexed_bam(bam_file, config) as work_bam: | 67 with indexed_bam(bam_file, config) as work_bam: |
81 sizes = zip(work_bam.references, work_bam.lengths) | 68 sizes = zip(work_bam.references, work_bam.lengths) |
82 return sizes | 69 return sizes |
83 | 70 |
84 def convert_to_graph(bam_file, split, config, out_handle): | 71 def convert_to_graph(bam_file, split, config, out_handle): |
85 cl = [config["program"]["bedtools_genomeCoverageBed"], "-ibam", bam_file, "-bg"] | 72 cl = [config["program"]["bedtools_genomeCoverageBed"]] + ["-ibam", bam_file, "-bg"] |
86 if split: | 73 if split: |
87 cl.append("-split") | 74 cl.append("-split") |
88 subprocess.check_call(cl, stdout=out_handle) | 75 subprocess.check_call(cl, stdout=out_handle) |
89 | 76 |
90 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): | 77 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): |
92 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) | 79 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) |
93 with open(size_file, "w") as out_handle: | 80 with open(size_file, "w") as out_handle: |
94 for chrom, size in chr_sizes: | 81 for chrom, size in chr_sizes: |
95 out_handle.write("%s\t%s\n" % (chrom, size)) | 82 out_handle.write("%s\t%s\n" % (chrom, size)) |
96 try: | 83 try: |
97 cl = [config["program"]["ucsc_bedGraphToBigWig"], bedgraph_file, size_file, bw_file] | 84 cl = [config["program"]["ucsc_bedGraphToBigWig"]] + [bedgraph_file, size_file, bw_file] |
98 subprocess.check_call(cl) | 85 subprocess.check_call(cl) |
99 finally: | 86 finally: |
100 os.remove(size_file) | 87 os.remove(size_file) |
101 return bw_file | 88 return bw_file |
102 | 89 |