diff gff3_rebase.py @ 98:b1260bca5fdc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 44d8fc559ecf5463a8f753561976fa26686c96f6
author bgruening
date Wed, 05 Jun 2024 10:00:07 +0000
parents 39b717d934a8
children
line wrap: on
line diff
--- a/gff3_rebase.py	Sat Jun 01 05:37:13 2024 +0000
+++ b/gff3_rebase.py	Wed Jun 05 10:00:07 2024 +0000
@@ -63,13 +63,8 @@
             else:
                 yield feature
 
-        if hasattr(feature, "sub_features"):
-            for x in feature_lambda(
-                feature.sub_features,
-                test,
-                test_kwargs,
-                subfeatures=subfeatures,
-            ):
+        if hasattr(feature, 'sub_features'):
+            for x in feature_lambda(feature.sub_features, test, test_kwargs, subfeatures=subfeatures):
                 yield x
 
 
@@ -79,8 +74,8 @@
     For every feature, check that at least one value in
     feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list']
     """
-    for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []):
-        if attribute_value in kwargs["attribute_list"]:
+    for attribute_value in feature.qualifiers.get(kwargs['qualifier'], []):
+        if attribute_value in kwargs['attribute_list']:
             return True
     return False
 
@@ -95,12 +90,12 @@
             # If it's an interpro specific gff3 file
             if interpro:
                 # Then we ignore polypeptide features as they're useless
-                if feature.type == "polypeptide":
+                if feature.type == 'polypeptide':
                     continue
                 # If there's an underscore, we strip up to that underscore?
                 # I do not know the rationale for this, removing.
                 # if '_' in parent_feature_id:
-                # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:]
+                    # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:]
 
             try:
                 child_features[parent_feature_id].append(feature)
@@ -117,7 +112,7 @@
         start *= 3
         end *= 3
 
-    if parent.location.strand != None and parent.location.strand >= 0:
+    if parent.location.strand >= 0:
         ns = parent.location.start + start
         ne = parent.location.start + end
         st = +1
@@ -136,33 +131,31 @@
         ns %= 3
     if ne < 0:
         ne %= 3
-    if ns > ne:
-        ne, ns = ns, ne  # dunno why but sometimes happens
+
     feature.location = FeatureLocation(ns, ne, strand=st)
 
-    if hasattr(feature, "sub_features"):
+    if hasattr(feature, 'sub_features'):
         for subfeature in feature.sub_features:
             __update_feature_location(subfeature, parent, protein2dna)
 
 
-def rebase(parent, child, interpro=False, protein2dna=False, map_by="ID"):
+def rebase(parent, child, interpro=False, protein2dna=False, map_by='ID'):
     # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID
     child_features = __get_features(child, interpro=interpro)
 
     for rec in GFF.parse(parent):
         replacement_features = []
         for feature in feature_lambda(
-            rec.features,
-            # Filter features in the parent genome by those that are
-            # "interesting", i.e. have results in child_features array.
-            # Probably an unnecessary optimisation.
-            feature_test_qual_value,
-            {
-                "qualifier": map_by,
-                "attribute_list": child_features.keys(),
-            },
-            subfeatures=False,
-        ):
+                rec.features,
+                # Filter features in the parent genome by those that are
+                # "interesting", i.e. have results in child_features array.
+                # Probably an unnecessary optimisation.
+                feature_test_qual_value,
+                {
+                    'qualifier': map_by,
+                    'attribute_list': child_features.keys(),
+                },
+                subfeatures=False):
 
             # Features which will be re-mapped
             to_remap = child_features[feature.id]
@@ -173,7 +166,7 @@
                 __update_feature_location(x, feature, protein2dna)
 
                 if interpro:
-                    for y in ("status", "Target"):
+                    for y in ('status', 'Target'):
                         try:
                             del x.qualifiers[y]
                         except Exception:
@@ -188,28 +181,14 @@
         GFF.write([rec], sys.stdout)
 
 
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="rebase gff3 features against parent locations", epilog=""
-    )
-    parser.add_argument(
-        "parent", type=argparse.FileType("r"), help="Parent GFF3 annotations"
-    )
-    parser.add_argument(
-        "child",
-        type=argparse.FileType("r"),
-        help="Child GFF3 annotations to rebase against parent",
-    )
-    parser.add_argument(
-        "--interpro",
-        action="store_true",
-        help="Interpro specific modifications",
-    )
-    parser.add_argument(
-        "--protein2dna",
-        action="store_true",
-        help="Map protein translated results to original DNA data",
-    )
-    parser.add_argument("--map_by", help="Map by key", default="ID")
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="")
+    parser.add_argument('parent', type=argparse.FileType('r'), help='Parent GFF3 annotations')
+    parser.add_argument('child', type=argparse.FileType('r'), help='Child GFF3 annotations to rebase against parent')
+    parser.add_argument('--interpro', action='store_true',
+                        help='Interpro specific modifications')
+    parser.add_argument('--protein2dna', action='store_true',
+                        help='Map protein translated results to original DNA data')
+    parser.add_argument('--map_by', help='Map by key', default='ID')
     args = parser.parse_args()
     rebase(**vars(args))