Mercurial > repos > triasteran > bam_to_bigwig
view BamToBigWig_planemo/bam_to_bigwig_py3.py @ 10:03546efc0470 draft default tip
Uploaded
author | triasteran |
---|---|
date | Tue, 22 Feb 2022 16:37:09 +0000 |
parents | 012dc1867dc7 |
children |
line wrap: on
line source
#!/usr/bin/env python #Original version copyright Brad Chapman with revisions from Peter Cock #and ideas from Lance Parsons """Convert BAM files to BigWig file format in a specified region. Usage: bam_to_wiggle.py <BAM file> [<YAML config>] [--outfile=<output file name>] The optional config file (not used by the Galaxy interface) is in YAML format and specifies the location of the binary tools. program: bedtools_genomeCoverageBed: genomeCoverageBed ucsc_bedGraphToBigWig: bedGraphToBigWig If not specified, these will be assumed to be present in the system path. The script requires: pysam (http://code.google.com/p/pysam/) genomeCoverageBed from BedTools (http://code.google.com/p/bedtools/) bedGraphToBigWig from UCSC (http://hgdownload.cse.ucsc.edu/admin/exe/) If a configuration file is used, then PyYAML is also required (http://pyyaml.org/) """ import os import sys import subprocess import tempfile from optparse import OptionParser from contextlib import contextmanager, closing import pysam def main(bam_file, config_file=None, outfile=None, split=False): if config_file: import yaml with open(config_file) as in_handle: config = yaml.load(in_handle) else: config = {"program": {"ucsc_bedGraphToBigWig" : "bedGraphToBigWig", "bedtools_genomeCoverageBed" : "genomeCoverageBed"}} if outfile is None: outfile = "%s.bigwig" % os.path.splitext(bam_file)[0] if os.path.abspath(bam_file) == os.path.abspath(outfile): sys.stderr.write("Bad arguments, input and output files are the same.\n") sys.exit(1) if os.path.exists(outfile) and os.path.getsize(outfile) > 0: sys.stderr.write("Warning, output file already exists.\n") sizes = get_sizes(bam_file, config) #print ("Have %i references" % len(sizes)) if not sizes: sys.stderr.write("Problem reading BAM header.\n") sys.exit(1) #Use a temp file to avoid any possiblity of not having write permission temp_handle = tempfile.NamedTemporaryFile(delete=False) temp_file = temp_handle.name with closing(temp_handle): print ("Calculating coverage...") convert_to_graph(bam_file, split, config, temp_handle) try: print ("Converting %i MB graph file to bigwig..." % (os.path.getsize(temp_file) // (1024*1024))) #Can't pipe this as stdin due to converter design, #https://lists.soe.ucsc.edu/pipermail/genome/2011-March/025455.html convert_to_bigwig(temp_file, sizes, config, outfile) finally: if os.path.isfile(temp_file): os.remove(temp_file) print ("Done") @contextmanager def indexed_bam(bam_file, config): if not os.path.exists(bam_file + ".bai"): pysam.index(bam_file) sam_reader = pysam.Samfile(bam_file, "rb") yield sam_reader sam_reader.close() def get_sizes(bam_file, config): with indexed_bam(bam_file, config) as work_bam: sizes = zip(work_bam.references, work_bam.lengths) return sizes def convert_to_graph(bam_file, split, config, out_handle): cl = [config["program"]["bedtools_genomeCoverageBed"], "-ibam", bam_file, "-bg"] if split: cl.append("-split") subprocess.check_call(cl, stdout=out_handle) def convert_to_bigwig(bedgraph_file, chr_sizes, config, bw_file): #This will be fine under Galaxy, but could use temp folder? size_file = "%s-sizes.txt" % (os.path.splitext(bw_file)[0]) with open(size_file, "w") as out_handle: for chrom, size in chr_sizes: out_handle.write("%s\t%s\n" % (chrom, size)) try: cl = [config["program"]["ucsc_bedGraphToBigWig"], bedgraph_file, size_file, bw_file] subprocess.check_call(cl) finally: os.remove(size_file) return bw_file if __name__ == "__main__": parser = OptionParser() parser.add_option("-o", "--outfile", dest="outfile") parser.add_option("-s", "--split", action="store_true", dest="split") (options, args) = parser.parse_args() if len(args) not in [1, 2]: print ("Incorrect arguments") print (__doc__) sys.exit() kwargs = dict( outfile=options.outfile, split=options.split) main(*args, **kwargs)