Mercurial > repos > fubar > jbrowse2dev
comparison jbrowse2/gff3_rebase.py @ 6:88b9b105c09b draft
Uploaded
author | fubar |
---|---|
date | Fri, 05 Jan 2024 01:58:02 +0000 |
parents | cd5d63cd0eb5 |
children |
comparison
equal
deleted
inserted
replaced
5:42ca8804cd93 | 6:88b9b105c09b |
---|---|
61 feature_copy.sub_features = [] | 61 feature_copy.sub_features = [] |
62 yield feature_copy | 62 yield feature_copy |
63 else: | 63 else: |
64 yield feature | 64 yield feature |
65 | 65 |
66 if hasattr(feature, 'sub_features'): | 66 if hasattr(feature, "sub_features"): |
67 for x in feature_lambda(feature.sub_features, test, test_kwargs, subfeatures=subfeatures): | 67 for x in feature_lambda( |
68 feature.sub_features, test, test_kwargs, subfeatures=subfeatures | |
69 ): | |
68 yield x | 70 yield x |
69 | 71 |
70 | 72 |
71 def feature_test_qual_value(feature, **kwargs): | 73 def feature_test_qual_value(feature, **kwargs): |
72 """Test qualifier values. | 74 """Test qualifier values. |
73 | 75 |
74 For every feature, check that at least one value in | 76 For every feature, check that at least one value in |
75 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] | 77 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] |
76 """ | 78 """ |
77 for attribute_value in feature.qualifiers.get(kwargs['qualifier'], []): | 79 for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []): |
78 if attribute_value in kwargs['attribute_list']: | 80 if attribute_value in kwargs["attribute_list"]: |
79 return True | 81 return True |
80 return False | 82 return False |
81 | 83 |
82 | 84 |
83 def __get_features(child, interpro=False): | 85 def __get_features(child, interpro=False): |
88 # Get the record id as parent_feature_id (since this is how it will be during remapping) | 90 # Get the record id as parent_feature_id (since this is how it will be during remapping) |
89 parent_feature_id = rec.id | 91 parent_feature_id = rec.id |
90 # If it's an interpro specific gff3 file | 92 # If it's an interpro specific gff3 file |
91 if interpro: | 93 if interpro: |
92 # Then we ignore polypeptide features as they're useless | 94 # Then we ignore polypeptide features as they're useless |
93 if feature.type == 'polypeptide': | 95 if feature.type == "polypeptide": |
94 continue | 96 continue |
95 # If there's an underscore, we strip up to that underscore? | 97 # If there's an underscore, we strip up to that underscore? |
96 # I do not know the rationale for this, removing. | 98 # I do not know the rationale for this, removing. |
97 # if '_' in parent_feature_id: | 99 # if '_' in parent_feature_id: |
98 # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] | 100 # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] |
99 | 101 |
100 try: | 102 try: |
101 child_features[parent_feature_id].append(feature) | 103 child_features[parent_feature_id].append(feature) |
102 except KeyError: | 104 except KeyError: |
103 child_features[parent_feature_id] = [feature] | 105 child_features[parent_feature_id] = [feature] |
132 if ne < 0: | 134 if ne < 0: |
133 ne %= 3 | 135 ne %= 3 |
134 | 136 |
135 feature.location = FeatureLocation(ns, ne, strand=st) | 137 feature.location = FeatureLocation(ns, ne, strand=st) |
136 | 138 |
137 if hasattr(feature, 'sub_features'): | 139 if hasattr(feature, "sub_features"): |
138 for subfeature in feature.sub_features: | 140 for subfeature in feature.sub_features: |
139 __update_feature_location(subfeature, parent, protein2dna) | 141 __update_feature_location(subfeature, parent, protein2dna) |
140 | 142 |
141 | 143 |
142 def rebase(parent, child, interpro=False, protein2dna=False, map_by='ID'): | 144 def rebase(parent, child, interpro=False, protein2dna=False, map_by="ID"): |
143 # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID | 145 # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID |
144 child_features = __get_features(child, interpro=interpro) | 146 child_features = __get_features(child, interpro=interpro) |
145 | 147 |
146 for rec in GFF.parse(parent): | 148 for rec in GFF.parse(parent): |
147 replacement_features = [] | 149 replacement_features = [] |
148 for feature in feature_lambda( | 150 for feature in feature_lambda( |
149 rec.features, | 151 rec.features, |
150 # Filter features in the parent genome by those that are | 152 # Filter features in the parent genome by those that are |
151 # "interesting", i.e. have results in child_features array. | 153 # "interesting", i.e. have results in child_features array. |
152 # Probably an unnecessary optimisation. | 154 # Probably an unnecessary optimisation. |
153 feature_test_qual_value, | 155 feature_test_qual_value, |
154 { | 156 { |
155 'qualifier': map_by, | 157 "qualifier": map_by, |
156 'attribute_list': child_features.keys(), | 158 "attribute_list": child_features.keys(), |
157 }, | 159 }, |
158 subfeatures=False): | 160 subfeatures=False, |
161 ): | |
159 | 162 |
160 # Features which will be re-mapped | 163 # Features which will be re-mapped |
161 to_remap = child_features[feature.id] | 164 to_remap = child_features[feature.id] |
162 # TODO: update starts | 165 # TODO: update starts |
163 fixed_features = [] | 166 fixed_features = [] |
164 for x in to_remap: | 167 for x in to_remap: |
165 # Then update the location of the actual feature | 168 # Then update the location of the actual feature |
166 __update_feature_location(x, feature, protein2dna) | 169 __update_feature_location(x, feature, protein2dna) |
167 | 170 |
168 if interpro: | 171 if interpro: |
169 for y in ('status', 'Target'): | 172 for y in ("status", "Target"): |
170 try: | 173 try: |
171 del x.qualifiers[y] | 174 del x.qualifiers[y] |
172 except Exception: | 175 except Exception: |
173 pass | 176 pass |
174 | 177 |
179 rec.features = replacement_features | 182 rec.features = replacement_features |
180 rec.annotations = {} | 183 rec.annotations = {} |
181 GFF.write([rec], sys.stdout) | 184 GFF.write([rec], sys.stdout) |
182 | 185 |
183 | 186 |
184 if __name__ == '__main__': | 187 if __name__ == "__main__": |
185 parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="") | 188 parser = argparse.ArgumentParser( |
186 parser.add_argument('parent', type=argparse.FileType('r'), help='Parent GFF3 annotations') | 189 description="rebase gff3 features against parent locations", epilog="" |
187 parser.add_argument('child', type=argparse.FileType('r'), help='Child GFF3 annotations to rebase against parent') | 190 ) |
188 parser.add_argument('--interpro', action='store_true', | 191 parser.add_argument( |
189 help='Interpro specific modifications') | 192 "parent", type=argparse.FileType("r"), help="Parent GFF3 annotations" |
190 parser.add_argument('--protein2dna', action='store_true', | 193 ) |
191 help='Map protein translated results to original DNA data') | 194 parser.add_argument( |
192 parser.add_argument('--map_by', help='Map by key', default='ID') | 195 "child", |
196 type=argparse.FileType("r"), | |
197 help="Child GFF3 annotations to rebase against parent", | |
198 ) | |
199 parser.add_argument( | |
200 "--interpro", action="store_true", help="Interpro specific modifications" | |
201 ) | |
202 parser.add_argument( | |
203 "--protein2dna", | |
204 action="store_true", | |
205 help="Map protein translated results to original DNA data", | |
206 ) | |
207 parser.add_argument("--map_by", help="Map by key", default="ID") | |
193 args = parser.parse_args() | 208 args = parser.parse_args() |
194 rebase(**vars(args)) | 209 rebase(**vars(args)) |