2
|
1 #!/usr/bin/env python
|
|
2 """Wrapper script using miraconvert & samtools to get BAM from MIRA.
|
|
3 """
|
4
|
4
|
2
|
5 import os
|
|
6 import shutil
|
|
7 import subprocess
|
4
|
8 import sys
|
2
|
9 import tempfile
|
|
10
|
|
11
|
|
12 def run(cmd, log_handle):
|
|
13 try:
|
|
14 child = subprocess.Popen(cmd, shell=True,
|
4
|
15 universal_newlines=True,
|
2
|
16 stdout=subprocess.PIPE,
|
|
17 stderr=subprocess.STDOUT)
|
4
|
18 except Exception as err:
|
2
|
19 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
|
4
|
20 # TODO - call clean up?
|
2
|
21 log_handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
|
|
22 sys.exit(1)
|
4
|
23 # Use .communicate as can get deadlocks with .wait(),
|
2
|
24 stdout, stderr = child.communicate()
|
4
|
25 assert not stderr # Should be empty as sent to stdout
|
2
|
26 if len(stdout) > 10000:
|
4
|
27 # miraconvert can be very verbose (is holding stdout in RAM a problem?)
|
2
|
28 stdout = stdout.split("\n")
|
|
29 stdout = stdout[:10] + ["...", "<snip>", "..."] + stdout[-10:]
|
|
30 stdout = "\n".join(stdout)
|
|
31 log_handle.write(stdout)
|
|
32 return child.returncode
|
|
33
|
4
|
34
|
2
|
35 def depad(fasta_file, sam_file, bam_file, log_handle):
|
|
36 log_handle.write("\n================= Converting MIRA assembly from SAM to BAM ===================\n")
|
4
|
37 # Also doing SAM to (uncompressed) BAM during depad
|
|
38 bam_stem = bam_file + ".tmp" # Have write permissions and want final file in this folder
|
2
|
39 cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % (fasta_file, sam_file, bam_stem)
|
|
40 return_code = run(cmd, log_handle)
|
|
41 if return_code:
|
|
42 return "Error %i from command:\n%s" % (return_code, cmd)
|
|
43 if not os.path.isfile(bam_stem + ".bam"):
|
|
44 return "samtools depad or sort failed to produce BAM file"
|
|
45
|
|
46 log_handle.write("\n====================== Indexing MIRA assembly BAM file =======================\n")
|
|
47 cmd = 'samtools index "%s.bam"' % bam_stem
|
|
48 return_code = run(cmd, log_handle)
|
|
49 if return_code:
|
|
50 return "Error %i from command:\n%s" % (return_code, cmd)
|
|
51 if not os.path.isfile(bam_stem + ".bam.bai"):
|
|
52 return "samtools indexing of BAM file failed to produce BAI file"
|
|
53
|
|
54 shutil.move(bam_stem + ".bam", bam_file)
|
4
|
55 os.remove(bam_stem + ".bam.bai") # Let Galaxy handle that...
|
2
|
56
|
|
57
|
|
58 def make_bam(mira_convert, maf_file, fasta_file, bam_file, log_handle):
|
|
59 if not os.path.isfile(maf_file):
|
|
60 return "Missing input MIRA file: %r" % maf_file
|
|
61 if not os.path.isfile(fasta_file):
|
|
62 return "Missing padded FASTA file: %r" % fasta_file
|
|
63
|
|
64 log_handle.write("\n====================== Converting MIRA assembly to SAM =======================\n")
|
|
65 tmp_dir = tempfile.mkdtemp()
|
|
66 sam_file = os.path.join(tmp_dir, "x.sam")
|
|
67
|
|
68 # Note add nbb to the template name, possible MIRA 4.0 RC4 bug
|
|
69 cmd = '"%s" -f maf -t samnbb "%s" "%snbb"' % (mira_convert, maf_file, sam_file)
|
|
70 return_code = run(cmd, log_handle)
|
|
71 if return_code:
|
|
72 return "Error %i from command:\n%s" % (return_code, cmd)
|
|
73 if not os.path.isfile(sam_file):
|
|
74 return "Conversion from MIRA to SAM failed"
|
|
75
|
4
|
76 # Also doing SAM to (uncompressed) BAM during depad
|
2
|
77 msg = depad(fasta_file, sam_file, bam_file, log_handle)
|
|
78 if msg:
|
|
79 return msg
|
|
80
|
|
81 os.remove(sam_file)
|
|
82 os.rmdir(tmp_dir)
|
|
83
|
4
|
84 return None # Good :)
|
|
85
|
2
|
86
|
|
87 if __name__ == "__main__":
|
|
88 mira_convert, maf_file, fasta_file, bam_file = sys.argv[1:]
|
|
89 msg = make_bam(mira_convert, maf_file, fasta_file, bam_file, sys.stdout)
|
|
90 if msg:
|
4
|
91 sys.exit(msg)
|