annotate phe/variant_filters/DP4Filter.py @ 0:834a312c0114 draft

Uploaded
author ulfschaefer
date Thu, 10 Dec 2015 09:22:39 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
1 '''Filter VCFs on AD ratio.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
2
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
3 Created on 24 Sep 2015
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
4
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
5 @author: alex
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
6 '''
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
7
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
8 import argparse
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
9 import logging
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
10
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
11 from phe.variant_filters import PHEFilterBase
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
12
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
13
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
14 class DP4Filter(PHEFilterBase):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
15 '''Filter sites by AD ratio.'''
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
16
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
17
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
18 name = "DP4"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
19 _default_threshold = 0.9
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
20 parameter = "dp4_ratio"
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
21
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
22 @classmethod
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
23 def customize_parser(self, parser):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
24 arg_name = self.parameter.replace("_", "-")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
25 parser.add_argument("--%s" % arg_name, type=float, default=self._default_threshold,
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
26 help="Filter sites below minimum dp4 ratio (default: %s)" % self._default_threshold)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
27
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
28 def __init__(self, args):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
29 """AD Ratio constructor."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
30 # This needs to happen first, because threshold is initialised here.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
31 super(DP4Filter, self).__init__(args)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
32
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
33 # Change the threshold to custom dp value.
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
34 self.threshold = self._default_threshold
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
35 if isinstance(args, argparse.Namespace):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
36 self.threshold = args.ad_ratio
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
37 elif isinstance(args, dict):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
38 try:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
39 self.threshold = float(args.get(self.parameter))
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
40 except TypeError:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
41 logging.error("Could not retrieve threshold from %s", args.get(self.parameter))
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
42 self.threshold = None
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
43
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
44
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
45 def __call__(self, record):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
46 """Filter a :py:class:`vcf.model._Record`."""
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
47
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
48 if not record.is_snp:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
49 return None
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
50
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
51 try:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
52 record_dp = record.INFO.get("DP4")
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
53
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
54 # FIXME: when record length is > 2, what do you do?
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
55 assert len(record_dp) == 4, "DP4 data should have 4 datum POS: %i" % record.POS
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
56
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
57 depth = sum(record_dp)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
58
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
59 ratio = float(sum(record_dp[2:])) / depth
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
60 except Exception:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
61 logging.error("Could not calculate DP4 ratio from %s POS: %s", record_dp, record.POS)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
62 ratio = None
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
63
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
64 if ratio is None or ratio < self.threshold:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
65 # FIXME: When ratio is None, i.e. error, what do you do?
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
66 return ratio or False
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
67 else:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
68 return None
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
69
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
70 def short_desc(self):
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
71 short_desc = self.__doc__ or ''
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
72
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
73 if short_desc:
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
74 short_desc = "%s (DP4 ratio > %s )" % (short_desc, self.threshold)
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
75
834a312c0114 Uploaded
ulfschaefer
parents:
diff changeset
76 return short_desc