Mercurial > repos > fubar > jbrowse2
comparison gff3_rebase.py @ 98:b1260bca5fdc draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/jbrowse2 commit 44d8fc559ecf5463a8f753561976fa26686c96f6
| author | bgruening |
|---|---|
| date | Wed, 05 Jun 2024 10:00:07 +0000 |
| parents | 39b717d934a8 |
| children |
comparison
equal
deleted
inserted
replaced
| 97:74074746ccd8 | 98:b1260bca5fdc |
|---|---|
| 61 feature_copy.sub_features = [] | 61 feature_copy.sub_features = [] |
| 62 yield feature_copy | 62 yield feature_copy |
| 63 else: | 63 else: |
| 64 yield feature | 64 yield feature |
| 65 | 65 |
| 66 if hasattr(feature, "sub_features"): | 66 if hasattr(feature, 'sub_features'): |
| 67 for x in feature_lambda( | 67 for x in feature_lambda(feature.sub_features, test, test_kwargs, subfeatures=subfeatures): |
| 68 feature.sub_features, | |
| 69 test, | |
| 70 test_kwargs, | |
| 71 subfeatures=subfeatures, | |
| 72 ): | |
| 73 yield x | 68 yield x |
| 74 | 69 |
| 75 | 70 |
| 76 def feature_test_qual_value(feature, **kwargs): | 71 def feature_test_qual_value(feature, **kwargs): |
| 77 """Test qualifier values. | 72 """Test qualifier values. |
| 78 | 73 |
| 79 For every feature, check that at least one value in | 74 For every feature, check that at least one value in |
| 80 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] | 75 feature.quailfiers(kwargs['qualifier']) is in kwargs['attribute_list'] |
| 81 """ | 76 """ |
| 82 for attribute_value in feature.qualifiers.get(kwargs["qualifier"], []): | 77 for attribute_value in feature.qualifiers.get(kwargs['qualifier'], []): |
| 83 if attribute_value in kwargs["attribute_list"]: | 78 if attribute_value in kwargs['attribute_list']: |
| 84 return True | 79 return True |
| 85 return False | 80 return False |
| 86 | 81 |
| 87 | 82 |
| 88 def __get_features(child, interpro=False): | 83 def __get_features(child, interpro=False): |
| 93 # Get the record id as parent_feature_id (since this is how it will be during remapping) | 88 # Get the record id as parent_feature_id (since this is how it will be during remapping) |
| 94 parent_feature_id = rec.id | 89 parent_feature_id = rec.id |
| 95 # If it's an interpro specific gff3 file | 90 # If it's an interpro specific gff3 file |
| 96 if interpro: | 91 if interpro: |
| 97 # Then we ignore polypeptide features as they're useless | 92 # Then we ignore polypeptide features as they're useless |
| 98 if feature.type == "polypeptide": | 93 if feature.type == 'polypeptide': |
| 99 continue | 94 continue |
| 100 # If there's an underscore, we strip up to that underscore? | 95 # If there's an underscore, we strip up to that underscore? |
| 101 # I do not know the rationale for this, removing. | 96 # I do not know the rationale for this, removing. |
| 102 # if '_' in parent_feature_id: | 97 # if '_' in parent_feature_id: |
| 103 # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] | 98 # parent_feature_id = parent_feature_id[parent_feature_id.index('_') + 1:] |
| 104 | 99 |
| 105 try: | 100 try: |
| 106 child_features[parent_feature_id].append(feature) | 101 child_features[parent_feature_id].append(feature) |
| 107 except KeyError: | 102 except KeyError: |
| 108 child_features[parent_feature_id] = [feature] | 103 child_features[parent_feature_id] = [feature] |
| 115 end = feature.location.end | 110 end = feature.location.end |
| 116 if protein2dna: | 111 if protein2dna: |
| 117 start *= 3 | 112 start *= 3 |
| 118 end *= 3 | 113 end *= 3 |
| 119 | 114 |
| 120 if parent.location.strand != None and parent.location.strand >= 0: | 115 if parent.location.strand >= 0: |
| 121 ns = parent.location.start + start | 116 ns = parent.location.start + start |
| 122 ne = parent.location.start + end | 117 ne = parent.location.start + end |
| 123 st = +1 | 118 st = +1 |
| 124 else: | 119 else: |
| 125 ns = parent.location.end - end | 120 ns = parent.location.end - end |
| 134 # frame that it should be in. | 129 # frame that it should be in. |
| 135 if ns < 0: | 130 if ns < 0: |
| 136 ns %= 3 | 131 ns %= 3 |
| 137 if ne < 0: | 132 if ne < 0: |
| 138 ne %= 3 | 133 ne %= 3 |
| 139 if ns > ne: | 134 |
| 140 ne, ns = ns, ne # dunno why but sometimes happens | |
| 141 feature.location = FeatureLocation(ns, ne, strand=st) | 135 feature.location = FeatureLocation(ns, ne, strand=st) |
| 142 | 136 |
| 143 if hasattr(feature, "sub_features"): | 137 if hasattr(feature, 'sub_features'): |
| 144 for subfeature in feature.sub_features: | 138 for subfeature in feature.sub_features: |
| 145 __update_feature_location(subfeature, parent, protein2dna) | 139 __update_feature_location(subfeature, parent, protein2dna) |
| 146 | 140 |
| 147 | 141 |
| 148 def rebase(parent, child, interpro=False, protein2dna=False, map_by="ID"): | 142 def rebase(parent, child, interpro=False, protein2dna=False, map_by='ID'): |
| 149 # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID | 143 # get all of the features we will be re-mapping in a dictionary, keyed by parent feature ID |
| 150 child_features = __get_features(child, interpro=interpro) | 144 child_features = __get_features(child, interpro=interpro) |
| 151 | 145 |
| 152 for rec in GFF.parse(parent): | 146 for rec in GFF.parse(parent): |
| 153 replacement_features = [] | 147 replacement_features = [] |
| 154 for feature in feature_lambda( | 148 for feature in feature_lambda( |
| 155 rec.features, | 149 rec.features, |
| 156 # Filter features in the parent genome by those that are | 150 # Filter features in the parent genome by those that are |
| 157 # "interesting", i.e. have results in child_features array. | 151 # "interesting", i.e. have results in child_features array. |
| 158 # Probably an unnecessary optimisation. | 152 # Probably an unnecessary optimisation. |
| 159 feature_test_qual_value, | 153 feature_test_qual_value, |
| 160 { | 154 { |
| 161 "qualifier": map_by, | 155 'qualifier': map_by, |
| 162 "attribute_list": child_features.keys(), | 156 'attribute_list': child_features.keys(), |
| 163 }, | 157 }, |
| 164 subfeatures=False, | 158 subfeatures=False): |
| 165 ): | |
| 166 | 159 |
| 167 # Features which will be re-mapped | 160 # Features which will be re-mapped |
| 168 to_remap = child_features[feature.id] | 161 to_remap = child_features[feature.id] |
| 169 # TODO: update starts | 162 # TODO: update starts |
| 170 fixed_features = [] | 163 fixed_features = [] |
| 171 for x in to_remap: | 164 for x in to_remap: |
| 172 # Then update the location of the actual feature | 165 # Then update the location of the actual feature |
| 173 __update_feature_location(x, feature, protein2dna) | 166 __update_feature_location(x, feature, protein2dna) |
| 174 | 167 |
| 175 if interpro: | 168 if interpro: |
| 176 for y in ("status", "Target"): | 169 for y in ('status', 'Target'): |
| 177 try: | 170 try: |
| 178 del x.qualifiers[y] | 171 del x.qualifiers[y] |
| 179 except Exception: | 172 except Exception: |
| 180 pass | 173 pass |
| 181 | 174 |
| 186 rec.features = replacement_features | 179 rec.features = replacement_features |
| 187 rec.annotations = {} | 180 rec.annotations = {} |
| 188 GFF.write([rec], sys.stdout) | 181 GFF.write([rec], sys.stdout) |
| 189 | 182 |
| 190 | 183 |
| 191 if __name__ == "__main__": | 184 if __name__ == '__main__': |
| 192 parser = argparse.ArgumentParser( | 185 parser = argparse.ArgumentParser(description='rebase gff3 features against parent locations', epilog="") |
| 193 description="rebase gff3 features against parent locations", epilog="" | 186 parser.add_argument('parent', type=argparse.FileType('r'), help='Parent GFF3 annotations') |
| 194 ) | 187 parser.add_argument('child', type=argparse.FileType('r'), help='Child GFF3 annotations to rebase against parent') |
| 195 parser.add_argument( | 188 parser.add_argument('--interpro', action='store_true', |
| 196 "parent", type=argparse.FileType("r"), help="Parent GFF3 annotations" | 189 help='Interpro specific modifications') |
| 197 ) | 190 parser.add_argument('--protein2dna', action='store_true', |
| 198 parser.add_argument( | 191 help='Map protein translated results to original DNA data') |
| 199 "child", | 192 parser.add_argument('--map_by', help='Map by key', default='ID') |
| 200 type=argparse.FileType("r"), | |
| 201 help="Child GFF3 annotations to rebase against parent", | |
| 202 ) | |
| 203 parser.add_argument( | |
| 204 "--interpro", | |
| 205 action="store_true", | |
| 206 help="Interpro specific modifications", | |
| 207 ) | |
| 208 parser.add_argument( | |
| 209 "--protein2dna", | |
| 210 action="store_true", | |
| 211 help="Map protein translated results to original DNA data", | |
| 212 ) | |
| 213 parser.add_argument("--map_by", help="Map by key", default="ID") | |
| 214 args = parser.parse_args() | 193 args = parser.parse_args() |
| 215 rebase(**vars(args)) | 194 rebase(**vars(args)) |
