diff jbrowse2/gff3_rebase.py @ 6:88b9b105c09b draft

Uploaded
author fubar
date Fri, 05 Jan 2024 01:58:02 +0000
parents cd5d63cd0eb5
children
line wrap: on
line diff
--- a/jbrowse2/gff3_rebase.py	Thu Jan 04 02:18:18 2024 +0000
+++ b/jbrowse2/gff3_rebase.py	Fri Jan 05 01:58:02 2024 +0000
@@ -63,8 +63,10 @@
             else:
                 yield feature
 
-        if hasattr(feature, 'sub_features'):
-            for x in feature_lambda(feature.sub_features, test, test_kwargs, subfeatures=subfeatures):
+        if hasattr(feature, "sub_features"):
+            for x in feature_lambda(
+                feature.sub_features, test, test_kwargs, subfeatures=subfeatures
+            ):
                 yield x
 
 
@@ -74,8 +76,8 @@
     For every feature, check that at least one value in
     feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list']
     """
-    for attribute_value in feature.qualifiers.get(kwargs['qualifier'], []):
-        if attribute_value in kwargs['attribute_list']:
+    for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []):
+        if attribute_value in kwargs["attribute_list"]:
             return True
     return False
 
@@ -90,12 +92,12 @@
             # If it's an interpro specific gff3 file
             if interpro:
                 # Then we ignore polypeptide features as they're useless
-                if feature.type == 'polypeptide':
+                if feature.type == "polypeptide":
                     continue
                 # If there's an underscore, we strip up to that underscore?
                 # I do not know the rationale for this, removing.
                 # if '_' in parent_feature_id:
-                    # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:]
+                # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:]
 
             try:
                 child_features[parent_feature_id].append(feature)
@@ -134,28 +136,29 @@
 
     feature.location = FeatureLocation(ns, ne, strand=st)
 
-    if hasattr(feature, 'sub_features'):
+    if hasattr(feature, "sub_features"):
         for subfeature in feature.sub_features:
             __update_feature_location(subfeature, parent, protein2dna)
 
 
-def rebase(parent, child, interpro=False, protein2dna=False, map_by='ID'):
+def rebase(parent, child, interpro=False, protein2dna=False, map_by="ID"):
     # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID
     child_features = __get_features(child, interpro=interpro)
 
     for rec in GFF.parse(parent):
         replacement_features = []
         for feature in feature_lambda(
-                rec.features,
-                # Filter features in the parent genome by those that are
-                # "interesting", i.e. have results in child_features array.
-                # Probably an unnecessary optimisation.
-                feature_test_qual_value,
-                {
-                    'qualifier': map_by,
-                    'attribute_list': child_features.keys(),
-                },
-                subfeatures=False):
+            rec.features,
+            # Filter features in the parent genome by those that are
+            # "interesting", i.e. have results in child_features array.
+            # Probably an unnecessary optimisation.
+            feature_test_qual_value,
+            {
+                "qualifier": map_by,
+                "attribute_list": child_features.keys(),
+            },
+            subfeatures=False,
+        ):
 
             # Features which will be re-mapped
             to_remap = child_features[feature.id]
@@ -166,7 +169,7 @@
                 __update_feature_location(x, feature, protein2dna)
 
                 if interpro:
-                    for y in ('status', 'Target'):
+                    for y in ("status", "Target"):
                         try:
                             del x.qualifiers[y]
                         except Exception:
@@ -181,14 +184,26 @@
         GFF.write([rec], sys.stdout)
 
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="")
-    parser.add_argument('parent', type=argparse.FileType('r'), help='Parent GFF3 annotations')
-    parser.add_argument('child', type=argparse.FileType('r'), help='Child GFF3 annotations to rebase against parent')
-    parser.add_argument('--interpro', action='store_true',
-                        help='Interpro specific modifications')
-    parser.add_argument('--protein2dna', action='store_true',
-                        help='Map protein translated results to original DNA data')
-    parser.add_argument('--map_by', help='Map by key', default='ID')
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="rebase gff3 features against parent locations", epilog=""
+    )
+    parser.add_argument(
+        "parent", type=argparse.FileType("r"), help="Parent GFF3 annotations"
+    )
+    parser.add_argument(
+        "child",
+        type=argparse.FileType("r"),
+        help="Child GFF3 annotations to rebase against parent",
+    )
+    parser.add_argument(
+        "--interpro", action="store_true", help="Interpro specific modifications"
+    )
+    parser.add_argument(
+        "--protein2dna",
+        action="store_true",
+        help="Map protein translated results to original DNA data",
+    )
+    parser.add_argument("--map_by", help="Map by key", default="ID")
     args = parser.parse_args()
     rebase(**vars(args))