annotate phe/variant/MPileupVariantCaller.py @ 11:cd59be4a7fe3 draft default tip

Uploaded
author ulfschaefer
date Mon, 21 Dec 2015 11:12:19 -0500
parents c2f8e7580133
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
1 '''
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
2 Created on 22 Sep 2015
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
3
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
4 @author: alex
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
5 '''
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
6 from collections import OrderedDict
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
7 import logging
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
8 import os
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
9 import subprocess
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
10 import tempfile
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
11
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
12 from phe.variant import VariantCaller
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
13
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
14
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
15 class MPileupVariantCaller(VariantCaller):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
16 """Implemetation of the Broad institute's variant caller."""
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
17
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
18 name = "mpileup"
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
19 """Plain text name of the variant caller."""
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
20
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
21 _default_options = "-m -f GQ"
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
22 """Default options for the variant caller."""
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
23
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
24 def __init__(self, cmd_options=None):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
25 """Constructor"""
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
26 if cmd_options is None:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
27 cmd_options = self._default_options
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
28
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
29 super(MPileupVariantCaller, self).__init__(cmd_options=cmd_options)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
30
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
31 self.last_command = None
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
32
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
33 def get_info(self, plain=False):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
34 d = {"name": self.name, "version": self.get_version(), "command": self.last_command}
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
35
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
36 if plain:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
37 result = "mpileup(%(version)s): %(command)s" % d
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
38 else:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
39 result = OrderedDict(d)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
40
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
41 return result
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
42
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
43 def get_version(self):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
44
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
45 p = subprocess.Popen(["samtools", "--version"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
46 (output, _) = p.communicate()
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
47
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
48 # first line is the version of the samtools
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
49 version = output.split("\n")[0].split(" ")[1]
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
50
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
51 return version
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
52
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
53 def make_vcf(self, *args, **kwargs):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
54 ref = kwargs.get("ref")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
55 bam = kwargs.get("bam")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
56
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
57 make_aux = kwargs.get("make_aux", False)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
58
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
59 if kwargs.get("vcf_file") is None:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
60 kwargs["vcf_file"] = "variants.vcf"
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
61
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
62 opts = {"ref": os.path.abspath(ref),
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
63 "bam": os.path.abspath(bam),
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
64 "all_variants_file": os.path.abspath(kwargs.get("vcf_file")),
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
65 "extra_cmd_options": self.cmd_options}
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
66
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
67 if make_aux:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
68 if not self.create_aux_files(ref):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
69 logging.warn("Auxiliary files were not created.")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
70 return False
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
71
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
72 with tempfile.NamedTemporaryFile(suffix=".pileup") as tmp:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
73 opts["pileup_file"] = tmp.name
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
74 cmd = "samtools mpileup -t DP,DV,DP4,DPR,SP -Auf %(ref)s %(bam)s | bcftools call %(extra_cmd_options)s > %(all_variants_file)s" % opts
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
75 print cmd
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
76 self.last_command = cmd
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
77 if os.system(cmd) != 0:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
78 logging.warn("Pileup creation was not successful.")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
79 return False
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
80
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
81 return True
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
82
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
83 def create_aux_files(self, ref):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
84 """Index reference with faidx from samtools.
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
85
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
86 Parameters:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
87 -----------
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
88 ref: str
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
89 Path to the reference file.
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
90
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
91 Returns:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
92 --------
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
93 bool:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
94 True if auxiliary files were created, False otherwise.
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
95 """
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
96
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
97 success = os.system("samtools faidx %s" % ref)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
98
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
99 if success != 0:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
100 logging.warn("Fasta index could not be created.")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
101 return False
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
102
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
103 return True