annotate cpt_promote_qualifiers/promote_qualifier.py @ 0:6f4c46313117 draft default tip

Uploaded
author cpt
date Fri, 17 Jun 2022 13:05:11 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
6f4c46313117 Uploaded
cpt
parents:
diff changeset
1 #!/usr/bin/env python
6f4c46313117 Uploaded
cpt
parents:
diff changeset
2 import argparse
6f4c46313117 Uploaded
cpt
parents:
diff changeset
3 import sys
6f4c46313117 Uploaded
cpt
parents:
diff changeset
4 import logging
6f4c46313117 Uploaded
cpt
parents:
diff changeset
5 from CPT_GFFParser import gffParse, gffWrite
6f4c46313117 Uploaded
cpt
parents:
diff changeset
6 from gff3 import feature_lambda, feature_test_type
6f4c46313117 Uploaded
cpt
parents:
diff changeset
7
6f4c46313117 Uploaded
cpt
parents:
diff changeset
8 logging.basicConfig(level=logging.INFO)
6f4c46313117 Uploaded
cpt
parents:
diff changeset
9 log = logging.getLogger(__name__)
6f4c46313117 Uploaded
cpt
parents:
diff changeset
10
6f4c46313117 Uploaded
cpt
parents:
diff changeset
11
6f4c46313117 Uploaded
cpt
parents:
diff changeset
12 def promote_qualifier(qualifier, parent, child, gff3):
6f4c46313117 Uploaded
cpt
parents:
diff changeset
13 for record in gffParse(gff3):
6f4c46313117 Uploaded
cpt
parents:
diff changeset
14 for parent_feature in feature_lambda(
6f4c46313117 Uploaded
cpt
parents:
diff changeset
15 record.features, feature_test_type, {"type": parent}, subfeatures=True
6f4c46313117 Uploaded
cpt
parents:
diff changeset
16 ):
6f4c46313117 Uploaded
cpt
parents:
diff changeset
17 # for each feature of the parent type, get the first subfeature of the child type
6f4c46313117 Uploaded
cpt
parents:
diff changeset
18 try:
6f4c46313117 Uploaded
cpt
parents:
diff changeset
19 first_child = sorted(
6f4c46313117 Uploaded
cpt
parents:
diff changeset
20 list(
6f4c46313117 Uploaded
cpt
parents:
diff changeset
21 feature_lambda(
6f4c46313117 Uploaded
cpt
parents:
diff changeset
22 parent_feature.sub_features,
6f4c46313117 Uploaded
cpt
parents:
diff changeset
23 feature_test_type,
6f4c46313117 Uploaded
cpt
parents:
diff changeset
24 {"type": child},
6f4c46313117 Uploaded
cpt
parents:
diff changeset
25 subfeatures=False,
6f4c46313117 Uploaded
cpt
parents:
diff changeset
26 )
6f4c46313117 Uploaded
cpt
parents:
diff changeset
27 ),
6f4c46313117 Uploaded
cpt
parents:
diff changeset
28 key=lambda x: x.location.start
6f4c46313117 Uploaded
cpt
parents:
diff changeset
29 if parent_feature.strand > 0
6f4c46313117 Uploaded
cpt
parents:
diff changeset
30 else x.location.end,
6f4c46313117 Uploaded
cpt
parents:
diff changeset
31 reverse=False if parent_feature.strand > 0 else True,
6f4c46313117 Uploaded
cpt
parents:
diff changeset
32 )[0]
6f4c46313117 Uploaded
cpt
parents:
diff changeset
33 except IndexError:
6f4c46313117 Uploaded
cpt
parents:
diff changeset
34 logging.warning("Child type %s not found under parent %s" % (child, parent_feature.qualifiers["ID"]))
6f4c46313117 Uploaded
cpt
parents:
diff changeset
35 continue
6f4c46313117 Uploaded
cpt
parents:
diff changeset
36 try:
6f4c46313117 Uploaded
cpt
parents:
diff changeset
37 parent_feature.qualifiers[qualifier] = first_child.qualifiers[qualifier]
6f4c46313117 Uploaded
cpt
parents:
diff changeset
38 logging.info(
6f4c46313117 Uploaded
cpt
parents:
diff changeset
39 "Promoted %s=%s in child %s to parent %s"
6f4c46313117 Uploaded
cpt
parents:
diff changeset
40 % (
6f4c46313117 Uploaded
cpt
parents:
diff changeset
41 qualifier,
6f4c46313117 Uploaded
cpt
parents:
diff changeset
42 first_child.qualifiers[qualifier],
6f4c46313117 Uploaded
cpt
parents:
diff changeset
43 first_child.qualifiers["ID"],
6f4c46313117 Uploaded
cpt
parents:
diff changeset
44 parent_feature.qualifiers["ID"],
6f4c46313117 Uploaded
cpt
parents:
diff changeset
45 )
6f4c46313117 Uploaded
cpt
parents:
diff changeset
46 )
6f4c46313117 Uploaded
cpt
parents:
diff changeset
47 except KeyError:
6f4c46313117 Uploaded
cpt
parents:
diff changeset
48 logging.warning(
6f4c46313117 Uploaded
cpt
parents:
diff changeset
49 "Qualifier %s not found in child feature %s"
6f4c46313117 Uploaded
cpt
parents:
diff changeset
50 % (qualifier, first_child.qualifiers["ID"])
6f4c46313117 Uploaded
cpt
parents:
diff changeset
51 )
6f4c46313117 Uploaded
cpt
parents:
diff changeset
52 gffWrite([record], sys.stdout)
6f4c46313117 Uploaded
cpt
parents:
diff changeset
53
6f4c46313117 Uploaded
cpt
parents:
diff changeset
54
6f4c46313117 Uploaded
cpt
parents:
diff changeset
55 if __name__ == "__main__":
6f4c46313117 Uploaded
cpt
parents:
diff changeset
56 parser = argparse.ArgumentParser(
6f4c46313117 Uploaded
cpt
parents:
diff changeset
57 description="Promote a child feature's qualifer to the parent feature's qualifier",
6f4c46313117 Uploaded
cpt
parents:
diff changeset
58 epilog="",
6f4c46313117 Uploaded
cpt
parents:
diff changeset
59 )
6f4c46313117 Uploaded
cpt
parents:
diff changeset
60 parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File")
6f4c46313117 Uploaded
cpt
parents:
diff changeset
61 parser.add_argument(
6f4c46313117 Uploaded
cpt
parents:
diff changeset
62 "parent",
6f4c46313117 Uploaded
cpt
parents:
diff changeset
63 type=str,
6f4c46313117 Uploaded
cpt
parents:
diff changeset
64 help="Feature type of the target parent feature (ex: gene, mrna, exon",
6f4c46313117 Uploaded
cpt
parents:
diff changeset
65 )
6f4c46313117 Uploaded
cpt
parents:
diff changeset
66 parser.add_argument(
6f4c46313117 Uploaded
cpt
parents:
diff changeset
67 "child",
6f4c46313117 Uploaded
cpt
parents:
diff changeset
68 type=str,
6f4c46313117 Uploaded
cpt
parents:
diff changeset
69 help="Feature type of the target child feature (ex: mrna, exon, CDS",
6f4c46313117 Uploaded
cpt
parents:
diff changeset
70 )
6f4c46313117 Uploaded
cpt
parents:
diff changeset
71 parser.add_argument(
6f4c46313117 Uploaded
cpt
parents:
diff changeset
72 "qualifier", help="Sepcific qualifier to promote (ex: Name, product, notes"
6f4c46313117 Uploaded
cpt
parents:
diff changeset
73 )
6f4c46313117 Uploaded
cpt
parents:
diff changeset
74 args = parser.parse_args()
6f4c46313117 Uploaded
cpt
parents:
diff changeset
75 promote_qualifier(**vars(args))