Mercurial > repos > fubar > jbrowse2
comparison gff3_rebase.py @ 98:b1260bca5fdc draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 44d8fc559ecf5463a8f753561976fa26686c96f6
author | bgruening |
---|---|
date | Wed, 05 Jun 2024 10:00:07 +0000 |
parents | 39b717d934a8 |
children |
comparison
equal
deleted
inserted
replaced
97:74074746ccd8 | 98:b1260bca5fdc |
---|---|
61 feature_copy.sub_features = [] | 61 feature_copy.sub_features = [] |
62 yield feature_copy | 62 yield feature_copy |
63 else: | 63 else: |
64 yield feature | 64 yield feature |
65 | 65 |
66 if hasattr(feature, "sub_features"): | 66 if hasattr(feature, 'sub_features'): |
67 for x in feature_lambda( | 67 for x in feature_lambda(feature.sub_features, test, test_kwargs, subfeatures=subfeatures): |
68 feature.sub_features, | |
69 test, | |
70 test_kwargs, | |
71 subfeatures=subfeatures, | |
72 ): | |
73 yield x | 68 yield x |
74 | 69 |
75 | 70 |
76 def feature_test_qual_value(feature, **kwargs): | 71 def feature_test_qual_value(feature, **kwargs): |
77 """Test qualifier values. | 72 """Test qualifier values. |
78 | 73 |
79 For every feature, check that at least one value in | 74 For every feature, check that at least one value in |
80 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] | 75 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] |
81 """ | 76 """ |
82 for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []): | 77 for attribute_value in feature.qualifiers.get(kwargs['qualifier'], []): |
83 if attribute_value in kwargs["attribute_list"]: | 78 if attribute_value in kwargs['attribute_list']: |
84 return True | 79 return True |
85 return False | 80 return False |
86 | 81 |
87 | 82 |
88 def __get_features(child, interpro=False): | 83 def __get_features(child, interpro=False): |
93 # Get the record id as parent_feature_id (since this is how it will be during remapping) | 88 # Get the record id as parent_feature_id (since this is how it will be during remapping) |
94 parent_feature_id = rec.id | 89 parent_feature_id = rec.id |
95 # If it's an interpro specific gff3 file | 90 # If it's an interpro specific gff3 file |
96 if interpro: | 91 if interpro: |
97 # Then we ignore polypeptide features as they're useless | 92 # Then we ignore polypeptide features as they're useless |
98 if feature.type == "polypeptide": | 93 if feature.type == 'polypeptide': |
99 continue | 94 continue |
100 # If there's an underscore, we strip up to that underscore? | 95 # If there's an underscore, we strip up to that underscore? |
101 # I do not know the rationale for this, removing. | 96 # I do not know the rationale for this, removing. |
102 # if '_' in parent_feature_id: | 97 # if '_' in parent_feature_id: |
103 # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] | 98 # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] |
104 | 99 |
105 try: | 100 try: |
106 child_features[parent_feature_id].append(feature) | 101 child_features[parent_feature_id].append(feature) |
107 except KeyError: | 102 except KeyError: |
108 child_features[parent_feature_id] = [feature] | 103 child_features[parent_feature_id] = [feature] |
115 end = feature.location.end | 110 end = feature.location.end |
116 if protein2dna: | 111 if protein2dna: |
117 start *= 3 | 112 start *= 3 |
118 end *= 3 | 113 end *= 3 |
119 | 114 |
120 if parent.location.strand != None and parent.location.strand >= 0: | 115 if parent.location.strand >= 0: |
121 ns = parent.location.start + start | 116 ns = parent.location.start + start |
122 ne = parent.location.start + end | 117 ne = parent.location.start + end |
123 st = +1 | 118 st = +1 |
124 else: | 119 else: |
125 ns = parent.location.end - end | 120 ns = parent.location.end - end |
134 # frame that it should be in. | 129 # frame that it should be in. |
135 if ns < 0: | 130 if ns < 0: |
136 ns %= 3 | 131 ns %= 3 |
137 if ne < 0: | 132 if ne < 0: |
138 ne %= 3 | 133 ne %= 3 |
139 if ns > ne: | 134 |
140 ne, ns = ns, ne # dunno why but sometimes happens | |
141 feature.location = FeatureLocation(ns, ne, strand=st) | 135 feature.location = FeatureLocation(ns, ne, strand=st) |
142 | 136 |
143 if hasattr(feature, "sub_features"): | 137 if hasattr(feature, 'sub_features'): |
144 for subfeature in feature.sub_features: | 138 for subfeature in feature.sub_features: |
145 __update_feature_location(subfeature, parent, protein2dna) | 139 __update_feature_location(subfeature, parent, protein2dna) |
146 | 140 |
147 | 141 |
148 def rebase(parent, child, interpro=False, protein2dna=False, map_by="ID"): | 142 def rebase(parent, child, interpro=False, protein2dna=False, map_by='ID'): |
149 # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID | 143 # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID |
150 child_features = __get_features(child, interpro=interpro) | 144 child_features = __get_features(child, interpro=interpro) |
151 | 145 |
152 for rec in GFF.parse(parent): | 146 for rec in GFF.parse(parent): |
153 replacement_features = [] | 147 replacement_features = [] |
154 for feature in feature_lambda( | 148 for feature in feature_lambda( |
155 rec.features, | 149 rec.features, |
156 # Filter features in the parent genome by those that are | 150 # Filter features in the parent genome by those that are |
157 # "interesting", i.e. have results in child_features array. | 151 # "interesting", i.e. have results in child_features array. |
158 # Probably an unnecessary optimisation. | 152 # Probably an unnecessary optimisation. |
159 feature_test_qual_value, | 153 feature_test_qual_value, |
160 { | 154 { |
161 "qualifier": map_by, | 155 'qualifier': map_by, |
162 "attribute_list": child_features.keys(), | 156 'attribute_list': child_features.keys(), |
163 }, | 157 }, |
164 subfeatures=False, | 158 subfeatures=False): |
165 ): | |
166 | 159 |
167 # Features which will be re-mapped | 160 # Features which will be re-mapped |
168 to_remap = child_features[feature.id] | 161 to_remap = child_features[feature.id] |
169 # TODO: update starts | 162 # TODO: update starts |
170 fixed_features = [] | 163 fixed_features = [] |
171 for x in to_remap: | 164 for x in to_remap: |
172 # Then update the location of the actual feature | 165 # Then update the location of the actual feature |
173 __update_feature_location(x, feature, protein2dna) | 166 __update_feature_location(x, feature, protein2dna) |
174 | 167 |
175 if interpro: | 168 if interpro: |
176 for y in ("status", "Target"): | 169 for y in ('status', 'Target'): |
177 try: | 170 try: |
178 del x.qualifiers[y] | 171 del x.qualifiers[y] |
179 except Exception: | 172 except Exception: |
180 pass | 173 pass |
181 | 174 |
186 rec.features = replacement_features | 179 rec.features = replacement_features |
187 rec.annotations = {} | 180 rec.annotations = {} |
188 GFF.write([rec], sys.stdout) | 181 GFF.write([rec], sys.stdout) |
189 | 182 |
190 | 183 |
191 if __name__ == "__main__": | 184 if __name__ == '__main__': |
192 parser = argparse.ArgumentParser( | 185 parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="") |
193 description="rebase gff3 features against parent locations", epilog="" | 186 parser.add_argument('parent', type=argparse.FileType('r'), help='Parent GFF3 annotations') |
194 ) | 187 parser.add_argument('child', type=argparse.FileType('r'), help='Child GFF3 annotations to rebase against parent') |
195 parser.add_argument( | 188 parser.add_argument('--interpro', action='store_true', |
196 "parent", type=argparse.FileType("r"), help="Parent GFF3 annotations" | 189 help='Interpro specific modifications') |
197 ) | 190 parser.add_argument('--protein2dna', action='store_true', |
198 parser.add_argument( | 191 help='Map protein translated results to original DNA data') |
199 "child", | 192 parser.add_argument('--map_by', help='Map by key', default='ID') |
200 type=argparse.FileType("r"), | |
201 help="Child GFF3 annotations to rebase against parent", | |
202 ) | |
203 parser.add_argument( | |
204 "--interpro", | |
205 action="store_true", | |
206 help="Interpro specific modifications", | |
207 ) | |
208 parser.add_argument( | |
209 "--protein2dna", | |
210 action="store_true", | |
211 help="Map protein translated results to original DNA data", | |
212 ) | |
213 parser.add_argument("--map_by", help="Map by key", default="ID") | |
214 args = parser.parse_args() | 193 args = parser.parse_args() |
215 rebase(**vars(args)) | 194 rebase(**vars(args)) |