comparison jbrowse2/gff3_rebase.py @ 6:88b9b105c09b draft

Uploaded
author fubar
date Fri, 05 Jan 2024 01:58:02 +0000
parents cd5d63cd0eb5
children
comparison
equal deleted inserted replaced
5:42ca8804cd93 6:88b9b105c09b
61 feature_copy.sub_features = [] 61 feature_copy.sub_features = []
62 yield feature_copy 62 yield feature_copy
63 else: 63 else:
64 yield feature 64 yield feature
65 65
66 if hasattr(feature, 'sub_features'): 66 if hasattr(feature, "sub_features"):
67 for x in feature_lambda(feature.sub_features, test, test_kwargs, subfeatures=subfeatures): 67 for x in feature_lambda(
68 feature.sub_features, test, test_kwargs, subfeatures=subfeatures
69 ):
68 yield x 70 yield x
69 71
70 72
71 def feature_test_qual_value(feature, **kwargs): 73 def feature_test_qual_value(feature, **kwargs):
72 """Test qualifier values. 74 """Test qualifier values.
73 75
74 For every feature, check that at least one value in 76 For every feature, check that at least one value in
75 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] 77 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list']
76 """ 78 """
77 for attribute_value in feature.qualifiers.get(kwargs['qualifier'], []): 79 for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []):
78 if attribute_value in kwargs['attribute_list']: 80 if attribute_value in kwargs["attribute_list"]:
79 return True 81 return True
80 return False 82 return False
81 83
82 84
83 def __get_features(child, interpro=False): 85 def __get_features(child, interpro=False):
88 # Get the record id as parent_feature_id (since this is how it will be during remapping) 90 # Get the record id as parent_feature_id (since this is how it will be during remapping)
89 parent_feature_id = rec.id 91 parent_feature_id = rec.id
90 # If it's an interpro specific gff3 file 92 # If it's an interpro specific gff3 file
91 if interpro: 93 if interpro:
92 # Then we ignore polypeptide features as they're useless 94 # Then we ignore polypeptide features as they're useless
93 if feature.type == 'polypeptide': 95 if feature.type == "polypeptide":
94 continue 96 continue
95 # If there's an underscore, we strip up to that underscore? 97 # If there's an underscore, we strip up to that underscore?
96 # I do not know the rationale for this, removing. 98 # I do not know the rationale for this, removing.
97 # if '_' in parent_feature_id: 99 # if '_' in parent_feature_id:
98 # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] 100 # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:]
99 101
100 try: 102 try:
101 child_features[parent_feature_id].append(feature) 103 child_features[parent_feature_id].append(feature)
102 except KeyError: 104 except KeyError:
103 child_features[parent_feature_id] = [feature] 105 child_features[parent_feature_id] = [feature]
132 if ne < 0: 134 if ne < 0:
133 ne %= 3 135 ne %= 3
134 136
135 feature.location = FeatureLocation(ns, ne, strand=st) 137 feature.location = FeatureLocation(ns, ne, strand=st)
136 138
137 if hasattr(feature, 'sub_features'): 139 if hasattr(feature, "sub_features"):
138 for subfeature in feature.sub_features: 140 for subfeature in feature.sub_features:
139 __update_feature_location(subfeature, parent, protein2dna) 141 __update_feature_location(subfeature, parent, protein2dna)
140 142
141 143
142 def rebase(parent, child, interpro=False, protein2dna=False, map_by='ID'): 144 def rebase(parent, child, interpro=False, protein2dna=False, map_by="ID"):
143 # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID 145 # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID
144 child_features = __get_features(child, interpro=interpro) 146 child_features = __get_features(child, interpro=interpro)
145 147
146 for rec in GFF.parse(parent): 148 for rec in GFF.parse(parent):
147 replacement_features = [] 149 replacement_features = []
148 for feature in feature_lambda( 150 for feature in feature_lambda(
149 rec.features, 151 rec.features,
150 # Filter features in the parent genome by those that are 152 # Filter features in the parent genome by those that are
151 # "interesting", i.e. have results in child_features array. 153 # "interesting", i.e. have results in child_features array.
152 # Probably an unnecessary optimisation. 154 # Probably an unnecessary optimisation.
153 feature_test_qual_value, 155 feature_test_qual_value,
154 { 156 {
155 'qualifier': map_by, 157 "qualifier": map_by,
156 'attribute_list': child_features.keys(), 158 "attribute_list": child_features.keys(),
157 }, 159 },
158 subfeatures=False): 160 subfeatures=False,
161 ):
159 162
160 # Features which will be re-mapped 163 # Features which will be re-mapped
161 to_remap = child_features[feature.id] 164 to_remap = child_features[feature.id]
162 # TODO: update starts 165 # TODO: update starts
163 fixed_features = [] 166 fixed_features = []
164 for x in to_remap: 167 for x in to_remap:
165 # Then update the location of the actual feature 168 # Then update the location of the actual feature
166 __update_feature_location(x, feature, protein2dna) 169 __update_feature_location(x, feature, protein2dna)
167 170
168 if interpro: 171 if interpro:
169 for y in ('status', 'Target'): 172 for y in ("status", "Target"):
170 try: 173 try:
171 del x.qualifiers[y] 174 del x.qualifiers[y]
172 except Exception: 175 except Exception:
173 pass 176 pass
174 177
179 rec.features = replacement_features 182 rec.features = replacement_features
180 rec.annotations = {} 183 rec.annotations = {}
181 GFF.write([rec], sys.stdout) 184 GFF.write([rec], sys.stdout)
182 185
183 186
184 if __name__ == '__main__': 187 if __name__ == "__main__":
185 parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="") 188 parser = argparse.ArgumentParser(
186 parser.add_argument('parent', type=argparse.FileType('r'), help='Parent GFF3 annotations') 189 description="rebase gff3 features against parent locations", epilog=""
187 parser.add_argument('child', type=argparse.FileType('r'), help='Child GFF3 annotations to rebase against parent') 190 )
188 parser.add_argument('--interpro', action='store_true', 191 parser.add_argument(
189 help='Interpro specific modifications') 192 "parent", type=argparse.FileType("r"), help="Parent GFF3 annotations"
190 parser.add_argument('--protein2dna', action='store_true', 193 )
191 help='Map protein translated results to original DNA data') 194 parser.add_argument(
192 parser.add_argument('--map_by', help='Map by key', default='ID') 195 "child",
196 type=argparse.FileType("r"),
197 help="Child GFF3 annotations to rebase against parent",
198 )
199 parser.add_argument(
200 "--interpro", action="store_true", help="Interpro specific modifications"
201 )
202 parser.add_argument(
203 "--protein2dna",
204 action="store_true",
205 help="Map protein translated results to original DNA data",
206 )
207 parser.add_argument("--map_by", help="Map by key", default="ID")
193 args = parser.parse_args() 208 args = parser.parse_args()
194 rebase(**vars(args)) 209 rebase(**vars(args))