Mercurial > repos > earlhaminst > gstf_preparation
comparison gstf_preparation.py @ 14:598e9172b8e7 draft
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/gstf_preparation commit ea67c766934266e690d05e3f9ebb4cca12b8e3e7"
author | earlhaminst |
---|---|
date | Fri, 18 Dec 2020 08:57:43 +0000 |
parents | 51a7a2a82902 |
children | 9c62ad7dd113 |
comparison
equal
deleted
inserted
replaced
13:51a7a2a82902 | 14:598e9172b8e7 |
---|---|
213 found_cds = False | 213 found_cds = False |
214 derived_translation_start = None | 214 derived_translation_start = None |
215 derived_translation_end = None | 215 derived_translation_end = None |
216 if transcript_id in cds_parent_dict: | 216 if transcript_id in cds_parent_dict: |
217 cds_list = cds_parent_dict[transcript_id] | 217 cds_list = cds_parent_dict[transcript_id] |
218 cds_ids = {_['id'] for _ in cds_list} | 218 unique_cds_ids = {cds['id'] for cds in cds_list} |
219 if len(cds_ids) > 1: | 219 if len(unique_cds_ids) > 1: |
220 raise Exception("Transcript %s has multiple CDSs: this is not supported by Ensembl JSON format" % transcript_id) | 220 msg = """Found multiple CDS IDs (%s) for transcript '%s'. |
221 cds_id = cds_ids.pop() | 221 This is not supported by the Ensembl JSON format. If a CDS is split across |
222 multiple discontinuous genomic locations, the GFF3 standard requires that all | |
223 corresponding lines use the same ID attribute.""" | |
224 raise Exception(msg % (unique_cds_ids, transcript_id)) | |
225 cds_id = unique_cds_ids.pop() | |
222 translation['id'] = cds_id | 226 translation['id'] = cds_id |
223 cds_list.sort(key=lambda _: _['start']) | 227 cds_list.sort(key=lambda _: _['start']) |
224 translation['CDS'] = cds_list | 228 translation['CDS'] = cds_list |
225 translation['start'] = cds_list[0]['start'] | 229 translation['start'] = cds_list[0]['start'] |
226 translation['end'] = cds_list[-1]['end'] | 230 translation['end'] = cds_list[-1]['end'] |