annotate BamToBigWig_planemo/bam_to_bigwig_py3.py @ 4:9e7a38ba91e8 draft

Uploaded
author triasteran
date Tue, 22 Feb 2022 16:17:50 +0000
parents 012dc1867dc7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
1 #!/usr/bin/env python
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
2 #Original version copyright Brad Chapman with revisions from Peter Cock
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
3 #and ideas from Lance Parsons
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
4 """Convert BAM files to BigWig file format in a specified region.
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
5
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
6 Usage:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
7 bam_to_wiggle.py <BAM file> [<YAML config>] [--outfile=<output file name>]
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
8
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
9 The optional config file (not used by the Galaxy interface) is in YAML format
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
10 and specifies the location of the binary tools.
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
11
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
12 program:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
13 bedtools_genomeCoverageBed: genomeCoverageBed
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
14 ucsc_bedGraphToBigWig: bedGraphToBigWig
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
15
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
16 If not specified, these will be assumed to be present in the system path.
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
17
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
18 The script requires:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
19 pysam (http://code.google.com/p/pysam/)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
20 genomeCoverageBed from BedTools (http://code.google.com/p/bedtools/)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
21 bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
22 If a configuration file is used, then PyYAML is also required (http://pyyaml.org/)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
23 """
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
24 import os
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
25 import sys
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
26 import subprocess
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
27 import tempfile
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
28 from optparse import OptionParser
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
29 from contextlib import contextmanager, closing
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
30
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
31 import pysam
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
32
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
33 def main(bam_file, config_file=None, outfile=None, split=False):
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
34 if config_file:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
35 import yaml
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
36 with open(config_file) as in_handle:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
37 config = yaml.load(in_handle)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
38 else:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
39 config = {"program": {"ucsc_bedGraphToBigWig" : "bedGraphToBigWig",
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
40 "bedtools_genomeCoverageBed" : "genomeCoverageBed"}}
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
41 if outfile is None:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
42 outfile = "%s.bigwig" % os.path.splitext(bam_file)[0]
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
43 if os.path.abspath(bam_file) == os.path.abspath(outfile):
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
44 sys.stderr.write("Bad arguments, input and output files are the same.\n")
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
45 sys.exit(1)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
46 if os.path.exists(outfile) and os.path.getsize(outfile) > 0:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
47 sys.stderr.write("Warning, output file already exists.\n")
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
48
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
49 sizes = get_sizes(bam_file, config)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
50 #print ("Have %i references" % len(sizes))
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
51 if not sizes:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
52 sys.stderr.write("Problem reading BAM header.\n")
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
53 sys.exit(1)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
54
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
55 #Use a temp file to avoid any possiblity of not having write permission
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
56 temp_handle = tempfile.NamedTemporaryFile(delete=False)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
57 temp_file = temp_handle.name
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
58 with closing(temp_handle):
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
59 print ("Calculating coverage...")
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
60 convert_to_graph(bam_file, split, config, temp_handle)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
61 try:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
62 print ("Converting %i MB graph file to bigwig..." % (os.path.getsize(temp_file) // (1024*1024)))
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
63 #Can't pipe this as stdin due to converter design,
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
64 #https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
65 convert_to_bigwig(temp_file, sizes, config, outfile)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
66 finally:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
67 if os.path.isfile(temp_file):
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
68 os.remove(temp_file)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
69 print ("Done")
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
70
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
71 @contextmanager
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
72 def indexed_bam(bam_file, config):
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
73 if not os.path.exists(bam_file + ".bai"):
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
74 pysam.index(bam_file)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
75 sam_reader = pysam.Samfile(bam_file, "rb")
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
76 yield sam_reader
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
77 sam_reader.close()
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
78
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
79 def get_sizes(bam_file, config):
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
80 with indexed_bam(bam_file, config) as work_bam:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
81 sizes = zip(work_bam.references, work_bam.lengths)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
82 return sizes
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
83
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
84 def convert_to_graph(bam_file, split, config, out_handle):
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
85 cl = [config["program"]["bedtools_genomeCoverageBed"], "-ibam", bam_file, "-bg"]
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
86 if split:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
87 cl.append("-split")
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
88 subprocess.check_call(cl, stdout=out_handle)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
89
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
90 def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file):
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
91 #This will be fine under Galaxy, but could use temp folder?
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
92 size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0])
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
93 with open(size_file, "w") as out_handle:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
94 for chrom, size in chr_sizes:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
95 out_handle.write("%s\t%s\n" % (chrom, size))
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
96 try:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
97 cl = [config["program"]["ucsc_bedGraphToBigWig"], bedgraph_file, size_file, bw_file]
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
98 subprocess.check_call(cl)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
99 finally:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
100 os.remove(size_file)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
101 return bw_file
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
102
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
103 if __name__ == "__main__":
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
104 parser = OptionParser()
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
105 parser.add_option("-o", "--outfile", dest="outfile")
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
106 parser.add_option("-s", "--split", action="store_true", dest="split")
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
107 (options, args) = parser.parse_args()
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
108 if len(args) not in [1, 2]:
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
109 print ("Incorrect arguments")
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
110 print (__doc__)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
111 sys.exit()
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
112 kwargs = dict(
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
113 outfile=options.outfile,
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
114 split=options.split)
012dc1867dc7 Not_yet_tested_properly
triasteran
parents:
diff changeset
115 main(*args, **kwargs)