diff cpt_promote_qualifiers/promote_qualifier.py @ 0:6f4c46313117 draft default tip

Uploaded
author cpt
date Fri, 17 Jun 2022 13:05:11 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/cpt_promote_qualifiers/promote_qualifier.py	Fri Jun 17 13:05:11 2022 +0000
@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+import argparse
+import sys
+import logging
+from CPT_GFFParser import gffParse, gffWrite
+from gff3 import feature_lambda, feature_test_type
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger(__name__)
+
+
+def promote_qualifier(qualifier, parent, child, gff3):
+    for record in gffParse(gff3):
+        for parent_feature in feature_lambda(
+            record.features, feature_test_type, {"type": parent}, subfeatures=True
+        ):
+            # for each feature of the parent type, get the first subfeature of the child type
+            try:
+                first_child = sorted(
+                    list(
+                        feature_lambda(
+                            parent_feature.sub_features,
+                            feature_test_type,
+                            {"type": child},
+                            subfeatures=False,
+                        )
+                    ),
+                    key=lambda x: x.location.start
+                    if parent_feature.strand > 0
+                    else x.location.end,
+                    reverse=False if parent_feature.strand > 0 else True,
+                )[0]
+            except IndexError:
+                logging.warning("Child type %s not found under parent %s" % (child, parent_feature.qualifiers["ID"]))
+                continue
+            try:
+                parent_feature.qualifiers[qualifier] = first_child.qualifiers[qualifier]
+                logging.info(
+                    "Promoted %s=%s in child %s to parent %s"
+                    % (
+                        qualifier,
+                        first_child.qualifiers[qualifier],
+                        first_child.qualifiers["ID"],
+                        parent_feature.qualifiers["ID"],
+                    )
+                )
+            except KeyError:
+                logging.warning(
+                    "Qualifier %s not found in child feature %s"
+                    % (qualifier, first_child.qualifiers["ID"])
+                )
+        gffWrite([record], sys.stdout)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Promote a child feature's qualifer to the parent feature's qualifier",
+        epilog="",
+    )
+    parser.add_argument("gff3", type=argparse.FileType("r"), help="GFF3 File")
+    parser.add_argument(
+        "parent",
+        type=str,
+        help="Feature type of the target parent feature (ex: gene, mrna, exon",
+    )
+    parser.add_argument(
+        "child",
+        type=str,
+        help="Feature type of the target child feature (ex: mrna, exon, CDS",
+    )
+    parser.add_argument(
+        "qualifier", help="Sepcific qualifier to promote (ex: Name, product, notes"
+    )
+    args = parser.parse_args()
+    promote_qualifier(**vars(args))