Mercurial > repos > vipints > fml_gff3togtf
comparison gtf_to_gff.py @ 10:c42c69aa81f8
fixed manually the upload of version 2.1.0 - deleted accidentally added files to the repo
| author | vipints <vipin@cbio.mskcc.org> |
|---|---|
| date | Thu, 23 Apr 2015 18:01:45 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 9:7d67331368f3 | 10:c42c69aa81f8 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 Convert Gene Transfer Format [GTF] to Generic Feature Format Version 3 [GFF3]. | |
| 4 | |
| 5 Usage: python gtf_to_gff.py in.gtf > out.gff3 | |
| 6 | |
| 7 Requirement: | |
| 8 GFFParser.py: https://github.com/vipints/GFFtools-GX/blob/master/GFFParser.py | |
| 9 helper.py: https://github.com/vipints/GFFtools-GX/blob/master/helper.py | |
| 10 | |
| 11 Copyright (C) | |
| 12 2009-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany. | |
| 13 2012-2015 Memorial Sloan Kettering Cancer Center New York City, USA. | |
| 14 """ | |
| 15 | |
| 16 import re | |
| 17 import sys | |
| 18 import helper | |
| 19 import GFFParser | |
| 20 | |
| 21 def GFFWriter(gtf_content): | |
| 22 """ | |
| 23 write the feature information to GFF format | |
| 24 | |
| 25 @args gtf_content: Parsed object from gtf file | |
| 26 @type gtf_content: numpy array | |
| 27 """ | |
| 28 | |
| 29 sys.stdout.write('##gff-version 3\n') | |
| 30 for ent1 in gtf_content: | |
| 31 chr_name = ent1['chr'] | |
| 32 strand = ent1['strand'] | |
| 33 start = ent1['start'] | |
| 34 stop = ent1['stop'] | |
| 35 source = ent1['source'] | |
| 36 ID = ent1['name'] | |
| 37 Name = ent1['gene_info']['Name'] | |
| 38 Name = ID if not Name else Name | |
| 39 | |
| 40 sys.stdout.write('%s\t%s\tgene\t%d\t%d\t.\t%s\t.\tID=%s;Name=%s\n' % (chr_name, source, start, stop, strand, ID, Name)) | |
| 41 for idx, tid in enumerate(ent1['transcripts']): | |
| 42 | |
| 43 t_start = ent1['exons'][idx][0][0] | |
| 44 t_stop = ent1['exons'][idx][-1][-1] | |
| 45 t_type = ent1['transcript_type'][idx] | |
| 46 | |
| 47 utr5_exons, utr3_exons = [], [] | |
| 48 if ent1['exons'][idx].any() and ent1['cds_exons'][idx].any(): | |
| 49 utr5_exons, utr3_exons = helper.buildUTR(ent1['cds_exons'][idx], ent1['exons'][idx], strand) | |
| 50 | |
| 51 sys.stdout.write('%s\t%s\t%s\t%d\t%d\t.\t%s\t.\tID=%s;Parent=%s\n' % (chr_name, source, t_type, t_start, t_stop, strand, tid[0], ID)) | |
| 52 for ex_cod in utr5_exons: | |
| 53 sys.stdout.write('%s\t%s\tfive_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0])) | |
| 54 | |
| 55 for ex_cod in ent1['cds_exons'][idx]: | |
| 56 sys.stdout.write('%s\t%s\tCDS\t%d\t%d\t.\t%s\t%d\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, ex_cod[2], tid[0])) | |
| 57 | |
| 58 for ex_cod in utr3_exons: | |
| 59 sys.stdout.write('%s\t%s\tthree_prime_UTR\t%d\t%d\t.\t%s\t.\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0])) | |
| 60 | |
| 61 for ex_cod in ent1['exons'][idx]: | |
| 62 sys.stdout.write('%s\t%s\texon\t%d\t%d\t.\t%s\t.\tParent=%s\n' % (chr_name, source, ex_cod[0], ex_cod[1], strand, tid[0])) | |
| 63 | |
| 64 | |
| 65 def __main__(): | |
| 66 | |
| 67 try: | |
| 68 gtf_fname = sys.argv[1] | |
| 69 except: | |
| 70 print __doc__ | |
| 71 sys.exit(-1) | |
| 72 | |
| 73 gtf_file_content = GFFParser.Parse(gtf_fname) | |
| 74 | |
| 75 GFFWriter(gtf_file_content) | |
| 76 | |
| 77 if __name__ == "__main__": | |
| 78 __main__() |
