Mercurial > repos > peterjc > mira4_assembler
diff tools/mira4_0/mira4_make_bam.py @ 2:4eb32a3d67d1 draft
v0.0.8 - renamed folder, added note about mirabait
author | peterjc |
---|---|
date | Wed, 02 Sep 2015 07:46:29 -0400 |
parents | |
children | 1713289d9908 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/mira4_0/mira4_make_bam.py Wed Sep 02 07:46:29 2015 -0400 @@ -0,0 +1,92 @@ +#!/usr/bin/env python +"""Wrapper script using miraconvert & samtools to get BAM from MIRA. +""" +import os +import sys +import shutil +import subprocess +import tempfile + +def sys_exit(msg, err=1): + sys.stderr.write(msg+"\n") + sys.exit(err) + +def run(cmd, log_handle): + try: + child = subprocess.Popen(cmd, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + except Exception, err: + sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) + #TODO - call clean up? + log_handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) + sys.exit(1) + #Use .communicate as can get deadlocks with .wait(), + stdout, stderr = child.communicate() + assert not stderr #Should be empty as sent to stdout + if len(stdout) > 10000: + #miraconvert can be very verbose (is holding stdout in RAM a problem?) + stdout = stdout.split("\n") + stdout = stdout[:10] + ["...", "<snip>", "..."] + stdout[-10:] + stdout = "\n".join(stdout) + log_handle.write(stdout) + return child.returncode + +def depad(fasta_file, sam_file, bam_file, log_handle): + log_handle.write("\n================= Converting MIRA assembly from SAM to BAM ===================\n") + #Also doing SAM to (uncompressed) BAM during depad + bam_stem = bam_file + ".tmp" # Have write permissions and want final file in this folder + cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % (fasta_file, sam_file, bam_stem) + return_code = run(cmd, log_handle) + if return_code: + return "Error %i from command:\n%s" % (return_code, cmd) + if not os.path.isfile(bam_stem + ".bam"): + return "samtools depad or sort failed to produce BAM file" + + log_handle.write("\n====================== Indexing MIRA assembly BAM file =======================\n") + cmd = 'samtools index "%s.bam"' % bam_stem + return_code = run(cmd, log_handle) + if return_code: + return "Error %i from command:\n%s" % (return_code, cmd) + if not os.path.isfile(bam_stem + ".bam.bai"): + return "samtools indexing of BAM file failed to produce BAI file" + + shutil.move(bam_stem + ".bam", bam_file) + os.remove(bam_stem + ".bam.bai") #Let Galaxy handle that... + + +def make_bam(mira_convert, maf_file, fasta_file, bam_file, log_handle): + if not os.path.isfile(mira_convert): + return "Missing binary %r" % mira_convert + if not os.path.isfile(maf_file): + return "Missing input MIRA file: %r" % maf_file + if not os.path.isfile(fasta_file): + return "Missing padded FASTA file: %r" % fasta_file + + log_handle.write("\n====================== Converting MIRA assembly to SAM =======================\n") + tmp_dir = tempfile.mkdtemp() + sam_file = os.path.join(tmp_dir, "x.sam") + + # Note add nbb to the template name, possible MIRA 4.0 RC4 bug + cmd = '"%s" -f maf -t samnbb "%s" "%snbb"' % (mira_convert, maf_file, sam_file) + return_code = run(cmd, log_handle) + if return_code: + return "Error %i from command:\n%s" % (return_code, cmd) + if not os.path.isfile(sam_file): + return "Conversion from MIRA to SAM failed" + + #Also doing SAM to (uncompressed) BAM during depad + msg = depad(fasta_file, sam_file, bam_file, log_handle) + if msg: + return msg + + os.remove(sam_file) + os.rmdir(tmp_dir) + + return None #Good :) + +if __name__ == "__main__": + mira_convert, maf_file, fasta_file, bam_file = sys.argv[1:] + msg = make_bam(mira_convert, maf_file, fasta_file, bam_file, sys.stdout) + if msg: + sys_exit(msg)