Mercurial > repos > fubar > jbrowse2dev
diff jbrowse2/gff3_rebase.py @ 6:88b9b105c09b draft
Uploaded
author | fubar |
---|---|
date | Fri, 05 Jan 2024 01:58:02 +0000 |
parents | cd5d63cd0eb5 |
children |
line wrap: on
line diff
--- a/jbrowse2/gff3_rebase.py Thu Jan 04 02:18:18 2024 +0000 +++ b/jbrowse2/gff3_rebase.py Fri Jan 05 01:58:02 2024 +0000 @@ -63,8 +63,10 @@ else: yield feature - if hasattr(feature, 'sub_features'): - for x in feature_lambda(feature.sub_features, test, test_kwargs, subfeatures=subfeatures): + if hasattr(feature, "sub_features"): + for x in feature_lambda( + feature.sub_features, test, test_kwargs, subfeatures=subfeatures + ): yield x @@ -74,8 +76,8 @@ For every feature, check that at least one value in feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] """ - for attribute_value in feature.qualifiers.get(kwargs['qualifier'], []): - if attribute_value in kwargs['attribute_list']: + for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []): + if attribute_value in kwargs["attribute_list"]: return True return False @@ -90,12 +92,12 @@ # If it's an interpro specific gff3 file if interpro: # Then we ignore polypeptide features as they're useless - if feature.type == 'polypeptide': + if feature.type == "polypeptide": continue # If there's an underscore, we strip up to that underscore? # I do not know the rationale for this, removing. # if '_' in parent_feature_id: - # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] + # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] try: child_features[parent_feature_id].append(feature) @@ -134,28 +136,29 @@ feature.location = FeatureLocation(ns, ne, strand=st) - if hasattr(feature, 'sub_features'): + if hasattr(feature, "sub_features"): for subfeature in feature.sub_features: __update_feature_location(subfeature, parent, protein2dna) -def rebase(parent, child, interpro=False, protein2dna=False, map_by='ID'): +def rebase(parent, child, interpro=False, protein2dna=False, map_by="ID"): # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID child_features = __get_features(child, interpro=interpro) for rec in GFF.parse(parent): replacement_features = [] for feature in feature_lambda( - rec.features, - # Filter features in the parent genome by those that are - # "interesting", i.e. have results in child_features array. - # Probably an unnecessary optimisation. - feature_test_qual_value, - { - 'qualifier': map_by, - 'attribute_list': child_features.keys(), - }, - subfeatures=False): + rec.features, + # Filter features in the parent genome by those that are + # "interesting", i.e. have results in child_features array. + # Probably an unnecessary optimisation. + feature_test_qual_value, + { + "qualifier": map_by, + "attribute_list": child_features.keys(), + }, + subfeatures=False, + ): # Features which will be re-mapped to_remap = child_features[feature.id] @@ -166,7 +169,7 @@ __update_feature_location(x, feature, protein2dna) if interpro: - for y in ('status', 'Target'): + for y in ("status", "Target"): try: del x.qualifiers[y] except Exception: @@ -181,14 +184,26 @@ GFF.write([rec], sys.stdout) -if __name__ == '__main__': - parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="") - parser.add_argument('parent', type=argparse.FileType('r'), help='Parent GFF3 annotations') - parser.add_argument('child', type=argparse.FileType('r'), help='Child GFF3 annotations to rebase against parent') - parser.add_argument('--interpro', action='store_true', - help='Interpro specific modifications') - parser.add_argument('--protein2dna', action='store_true', - help='Map protein translated results to original DNA data') - parser.add_argument('--map_by', help='Map by key', default='ID') +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="rebase gff3 features against parent locations", epilog="" + ) + parser.add_argument( + "parent", type=argparse.FileType("r"), help="Parent GFF3 annotations" + ) + parser.add_argument( + "child", + type=argparse.FileType("r"), + help="Child GFF3 annotations to rebase against parent", + ) + parser.add_argument( + "--interpro", action="store_true", help="Interpro specific modifications" + ) + parser.add_argument( + "--protein2dna", + action="store_true", + help="Map protein translated results to original DNA data", + ) + parser.add_argument("--map_by", help="Map by key", default="ID") args = parser.parse_args() rebase(**vars(args))