Mercurial > repos > triasteran > bam_to_bigwig
comparison BamToBigWig_planemo/bam_to_bigwig_py3.py @ 4:9e7a38ba91e8 draft
Uploaded
author | triasteran |
---|---|
date | Tue, 22 Feb 2022 16:17:50 +0000 |
parents | 012dc1867dc7 |
children |
comparison
equal
deleted
inserted
replaced
3:d6280eee8002 | 4:9e7a38ba91e8 |
---|---|
1 #!/usr/bin/env python | |
2 #Original version copyright Brad Chapman with revisions from Peter Cock | |
3 #and ideas from Lance Parsons | |
4 """Convert BAM files to BigWig file format in a specified region. | |
5 | |
6 Usage: | |
7 bam_to_wiggle.py <BAM file> [<YAML config>] [--outfile=<output file name>] | |
8 | |
9 The optional config file (not used by the Galaxy interface) is in YAML format | |
10 and specifies the location of the binary tools. | |
11 | |
12 program: | |
13 bedtools_genomeCoverageBed: genomeCoverageBed | |
14 ucsc_bedGraphToBigWig: bedGraphToBigWig | |
15 | |
16 If not specified, these will be assumed to be present in the system path. | |
17 | |
18 The script requires: | |
19 pysam (http://code.google.com/p/pysam/) | |
20 genomeCoverageBed from BedTools (http://code.google.com/p/bedtools/) | |
21 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) | |
22 If a configuration file is used, then PyYAML is also required (http://pyyaml.org/) | |
23 """ | |
24 import os | |
25 import sys | |
26 import subprocess | |
27 import tempfile | |
28 from optparse import OptionParser | |
29 from contextlib import contextmanager, closing | |
30 | |
31 import pysam | |
32 | |
33 def main(bam_file, config_file=None, outfile=None, split=False): | |
34 if config_file: | |
35 import yaml | |
36 with open(config_file) as in_handle: | |
37 config = yaml.load(in_handle) | |
38 else: | |
39 config = {"program": {"ucsc_bedGraphToBigWig" : "bedGraphToBigWig", | |
40 "bedtools_genomeCoverageBed" : "genomeCoverageBed"}} | |
41 if outfile is None: | |
42 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] | |
43 if os.path.abspath(bam_file) == os.path.abspath(outfile): | |
44 sys.stderr.write("Bad arguments, input and output files are the same.\n") | |
45 sys.exit(1) | |
46 if os.path.exists(outfile) and os.path.getsize(outfile) > 0: | |
47 sys.stderr.write("Warning, output file already exists.\n") | |
48 | |
49 sizes = get_sizes(bam_file, config) | |
50 #print ("Have %i references" % len(sizes)) | |
51 if not sizes: | |
52 sys.stderr.write("Problem reading BAM header.\n") | |
53 sys.exit(1) | |
54 | |
55 #Use a temp file to avoid any possiblity of not having write permission | |
56 temp_handle = tempfile.NamedTemporaryFile(delete=False) | |
57 temp_file = temp_handle.name | |
58 with closing(temp_handle): | |
59 print ("Calculating coverage...") | |
60 convert_to_graph(bam_file, split, config, temp_handle) | |
61 try: | |
62 print ("Converting %i MB graph file to bigwig..." % (os.path.getsize(temp_file) // (1024*1024))) | |
63 #Can't pipe this as stdin due to converter design, | |
64 #https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html | |
65 convert_to_bigwig(temp_file, sizes, config, outfile) | |
66 finally: | |
67 if os.path.isfile(temp_file): | |
68 os.remove(temp_file) | |
69 print ("Done") | |
70 | |
71 @contextmanager | |
72 def indexed_bam(bam_file, config): | |
73 if not os.path.exists(bam_file + ".bai"): | |
74 pysam.index(bam_file) | |
75 sam_reader = pysam.Samfile(bam_file, "rb") | |
76 yield sam_reader | |
77 sam_reader.close() | |
78 | |
79 def get_sizes(bam_file, config): | |
80 with indexed_bam(bam_file, config) as work_bam: | |
81 sizes = zip(work_bam.references, work_bam.lengths) | |
82 return sizes | |
83 | |
84 def convert_to_graph(bam_file, split, config, out_handle): | |
85 cl = [config["program"]["bedtools_genomeCoverageBed"], "-ibam", bam_file, "-bg"] | |
86 if split: | |
87 cl.append("-split") | |
88 subprocess.check_call(cl, stdout=out_handle) | |
89 | |
90 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): | |
91 #This will be fine under Galaxy, but could use temp folder? | |
92 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) | |
93 with open(size_file, "w") as out_handle: | |
94 for chrom, size in chr_sizes: | |
95 out_handle.write("%s\t%s\n" % (chrom, size)) | |
96 try: | |
97 cl = [config["program"]["ucsc_bedGraphToBigWig"], bedgraph_file, size_file, bw_file] | |
98 subprocess.check_call(cl) | |
99 finally: | |
100 os.remove(size_file) | |
101 return bw_file | |
102 | |
103 if __name__ == "__main__": | |
104 parser = OptionParser() | |
105 parser.add_option("-o", "--outfile", dest="outfile") | |
106 parser.add_option("-s", "--split", action="store_true", dest="split") | |
107 (options, args) = parser.parse_args() | |
108 if len(args) not in [1, 2]: | |
109 print ("Incorrect arguments") | |
110 print (__doc__) | |
111 sys.exit() | |
112 kwargs = dict( | |
113 outfile=options.outfile, | |
114 split=options.split) | |
115 main(*args, **kwargs) |