annotate phe/variant_filters/MQ0Filter.py @ 10:c2f8e7580133 draft

Uploaded
author ulfschaefer
date Mon, 21 Dec 2015 10:50:17 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
1 '''Filter VCF on MQ filter.
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
2 Created on 24 Sep 2015
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
3
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
4 @author: alex
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
5 '''
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
6
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
7 import argparse
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
8 import logging
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
9
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
10 from phe.variant_filters import PHEFilterBase
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
11
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
12
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
13 class MQ0Filter(PHEFilterBase):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
14 '''Filter sites by MQ0 (Total Mapping Quality Zero Reads) to DP ratio.'''
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
15
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
16 name = "MinMQ0"
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
17 _default_threshold = 0.05
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
18 parameter = "mq0_ratio"
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
19
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
20 @classmethod
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
21 def customize_parser(self, parser):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
22 arg_name = self.parameter.replace("_", "-")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
23 parser.add_argument("--%s" % arg_name, type=float, default=self._default_threshold,
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
24 help="Filter sites below given MQ score (default: %s)" % self._default_threshold)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
25
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
26 def __init__(self, args):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
27 """Min Mapping Quality Zero constructor."""
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
28 # This needs to happen first, because threshold is initialised here.
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
29 super(MQ0Filter, self).__init__(args)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
30
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
31 # Change the threshold to custom gq value.
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
32 self.threshold = self._default_threshold
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
33 if isinstance(args, argparse.Namespace):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
34 self.threshold = args.mq_score
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
35 elif isinstance(args, dict):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
36 try:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
37 self.threshold = float(args.get(self.parameter))
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
38 except (TypeError, ValueError):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
39 logging.error("Could not retrieve threshold from %s", args.get(self.parameter))
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
40 logging.error("This parameter requires to be a float!")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
41 raise Exception("Could not create MQ0 filter from parameters: %s" % args)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
42
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
43 def __call__(self, record):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
44 """Filter a :py:class:`vcf.model._Record`."""
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
45
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
46 good_record = self._check_record(record)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
47
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
48 if good_record is not True:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
49 return good_record
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
50
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
51 record_mq = record.INFO.get("MQ0")
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
52
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
53 if record_mq:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
54 # We consider DO from INFO not samples because MQ0 is also from INFO.
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
55 record_mq /= float(record.INFO.get("DP"))
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
56
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
57 if record_mq is None or record_mq > self.threshold:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
58 # FIXME: when record_mq is None, i,e, error/missing, what do you do?
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
59 return record_mq or False
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
60 else:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
61 return None
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
62
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
63 def short_desc(self):
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
64 short_desc = self.__doc__ or ''
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
65
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
66 if short_desc:
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
67 short_desc = "%s (MQ0 > %s)" % (short_desc, self.threshold)
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
68
c2f8e7580133 Uploaded
ulfschaefer
parents:
diff changeset
69 return short_desc