annotate tools/mira4_0/mira4_make_bam.py @ 4:1713289d9908 draft default tip

v0.0.11 tweak for use with bioconda dependencies
author peterjc
date Thu, 10 Aug 2017 11:09:10 -0400
parents 4eb32a3d67d1
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
1 #!/usr/bin/env python
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
2 """Wrapper script using miraconvert & samtools to get BAM from MIRA.
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
3 """
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
4
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
5 import os
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
6 import shutil
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
7 import subprocess
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
8 import sys
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
9 import tempfile
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
10
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
11
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
12 def run(cmd, log_handle):
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
13 try:
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
14 child = subprocess.Popen(cmd, shell=True,
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
15 universal_newlines=True,
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
16 stdout=subprocess.PIPE,
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
17 stderr=subprocess.STDOUT)
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
18 except Exception as err:
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
19 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
20 # TODO - call clean up?
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
21 log_handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err))
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
22 sys.exit(1)
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
23 # Use .communicate as can get deadlocks with .wait(),
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
24 stdout, stderr = child.communicate()
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
25 assert not stderr # Should be empty as sent to stdout
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
26 if len(stdout) > 10000:
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
27 # miraconvert can be very verbose (is holding stdout in RAM a problem?)
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
28 stdout = stdout.split("\n")
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
29 stdout = stdout[:10] + ["...", "<snip>", "..."] + stdout[-10:]
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
30 stdout = "\n".join(stdout)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
31 log_handle.write(stdout)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
32 return child.returncode
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
33
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
34
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
35 def depad(fasta_file, sam_file, bam_file, log_handle):
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
36 log_handle.write("\n================= Converting MIRA assembly from SAM to BAM ===================\n")
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
37 # Also doing SAM to (uncompressed) BAM during depad
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
38 bam_stem = bam_file + ".tmp" # Have write permissions and want final file in this folder
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
39 cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % (fasta_file, sam_file, bam_stem)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
40 return_code = run(cmd, log_handle)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
41 if return_code:
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
42 return "Error %i from command:\n%s" % (return_code, cmd)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
43 if not os.path.isfile(bam_stem + ".bam"):
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
44 return "samtools depad or sort failed to produce BAM file"
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
45
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
46 log_handle.write("\n====================== Indexing MIRA assembly BAM file =======================\n")
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
47 cmd = 'samtools index "%s.bam"' % bam_stem
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
48 return_code = run(cmd, log_handle)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
49 if return_code:
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
50 return "Error %i from command:\n%s" % (return_code, cmd)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
51 if not os.path.isfile(bam_stem + ".bam.bai"):
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
52 return "samtools indexing of BAM file failed to produce BAI file"
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
53
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
54 shutil.move(bam_stem + ".bam", bam_file)
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
55 os.remove(bam_stem + ".bam.bai") # Let Galaxy handle that...
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
56
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
57
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
58 def make_bam(mira_convert, maf_file, fasta_file, bam_file, log_handle):
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
59 if not os.path.isfile(maf_file):
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
60 return "Missing input MIRA file: %r" % maf_file
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
61 if not os.path.isfile(fasta_file):
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
62 return "Missing padded FASTA file: %r" % fasta_file
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
63
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
64 log_handle.write("\n====================== Converting MIRA assembly to SAM =======================\n")
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
65 tmp_dir = tempfile.mkdtemp()
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
66 sam_file = os.path.join(tmp_dir, "x.sam")
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
67
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
68 # Note add nbb to the template name, possible MIRA 4.0 RC4 bug
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
69 cmd = '"%s" -f maf -t samnbb "%s" "%snbb"' % (mira_convert, maf_file, sam_file)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
70 return_code = run(cmd, log_handle)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
71 if return_code:
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
72 return "Error %i from command:\n%s" % (return_code, cmd)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
73 if not os.path.isfile(sam_file):
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
74 return "Conversion from MIRA to SAM failed"
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
75
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
76 # Also doing SAM to (uncompressed) BAM during depad
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
77 msg = depad(fasta_file, sam_file, bam_file, log_handle)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
78 if msg:
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
79 return msg
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
80
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
81 os.remove(sam_file)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
82 os.rmdir(tmp_dir)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
83
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
84 return None # Good :)
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
85
2
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
86
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
87 if __name__ == "__main__":
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
88 mira_convert, maf_file, fasta_file, bam_file = sys.argv[1:]
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
89 msg = make_bam(mira_convert, maf_file, fasta_file, bam_file, sys.stdout)
4eb32a3d67d1 v0.0.8 - renamed folder, added note about mirabait
peterjc
parents:
diff changeset
90 if msg:
4
1713289d9908 v0.0.11 tweak for use with bioconda dependencies
peterjc
parents: 2
diff changeset
91 sys.exit(msg)