annotate phe/variant_filters/ADFilter.py @ 11:cd59be4a7fe3 draft default tip

Uploaded
author ulfschaefer
date Mon, 21 Dec 2015 11:12:19 -0500
parents c2f8e7580133
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
1 '''Filter VCFs on AD ratio.
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
2
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
3 Created on 24 Sep 2015
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
4
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
5 @author: alex
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
6 '''
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
7
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
8 import argparse
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
9 import logging
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
10
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
11 from phe.variant_filters import PHEFilterBase
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
12
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
13
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
14 class ADFilter(PHEFilterBase):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
15 '''Filter sites by AD ratio.'''
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
16
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
17
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
18 name = "ADRatio"
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
19 _default_threshold = 0.9
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
20 parameter = "ad_ratio"
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
21
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
22 @classmethod
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
23 def customize_parser(self, parser):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
24 arg_name = self.parameter.replace("_", "-")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
25 parser.add_argument("--%s" % arg_name, type=float, default=self._default_threshold,
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
26 help="Filter sites below minimum ad ratio (default: %s)" % self._default_threshold)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
27
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
28 def __init__(self, args):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
29 """AD Ratio constructor."""
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
30 # This needs to happen first, because threshold is initialised here.
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
31 super(ADFilter, self).__init__(args)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
32
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
33 # Change the threshold to custom dp value.
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
34 self.threshold = self._default_threshold
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
35 if isinstance(args, argparse.Namespace):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
36 self.threshold = args.ad_ratio
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
37 elif isinstance(args, dict):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
38 try:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
39 self.threshold = float(args.get(self.parameter))
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
40 except (TypeError, ValueError):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
41 logging.error("Could not retrieve threshold from %s", args.get(self.parameter))
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
42 logging.error("This parameter requires to be a float!")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
43 raise Exception("Could not create AD filter from parameters: %s" % args)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
44
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
45
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
46 def __call__(self, record):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
47 """Filter a :py:class:`vcf.model._Record`."""
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
48
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
49 good_record = self._check_record(record)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
50
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
51 if good_record is not True:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
52 return good_record
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
53
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
54 if len(record.samples) > 1:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
55 logging.warn("More than 1 sample detected. Only first is considered.")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
56
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
57 try:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
58 record_ad = record.samples[0].data.AD
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
59
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
60 # FIXME: when record length is > 2, what do you do?
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
61 assert len(record_ad) == 2, "AD data is incomplete POS: %i" % record.POS
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
62
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
63 depth = sum(record.samples[0].data.AD)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
64
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
65 ratio = float(record_ad[1]) / depth
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
66 except Exception:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
67 logging.warn("Could not calculate AD ratio from %s POS: %s", record, record.POS)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
68 ratio = None
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
69
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
70 if ratio is None or ratio < self.threshold:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
71 # FIXME: When ratio is None, i.e. error, what do you do?
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
72 return ratio or False
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
73 else:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
74 return None
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
75
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
76 def short_desc(self):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
77 short_desc = self.__doc__ or ''
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
78
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
79 if short_desc:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
80 short_desc = "%s (AD ratio > %s )" % (short_desc, self.threshold)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
81
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
82 return short_desc