Mercurial > repos > bgruening > trna_prediction
view aragorn_out_to_gff3.py @ 2:358f58401cd6 draft default tip
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/rna_tools/trna_prediction commit cfb19d75629f02e0dea4475c16c016ed5510eb44
author | bgruening |
---|---|
date | Wed, 26 Jul 2017 10:14:05 -0400 |
parents | d788d1abe238 |
children |
line wrap: on
line source
#!/usr/bin/env python import sys full_gene_model = False if '--full' in sys.argv: full_gene_model = True genome_id = None stdin_data = [] KEY_ORDER = ('parent', 'source', 'type', 'start', 'end', 'score', 'strand', '8', 'quals') # Table of amino acids aa_table = { 'Ala' : 'A', 'Arg' : 'R', 'Asn' : 'N', 'Asp' : 'D', 'Cys' : 'C', 'Gln' : 'Q', 'Glu' : 'E', 'Gly' : 'G', 'His' : 'H', 'Ile' : 'I', 'Leu' : 'L', 'Lys' : 'K', 'Met' : 'M', 'Phe' : 'F', 'Pro' : 'P', 'Ser' : 'S', 'Thr' : 'T', 'Trp' : 'W', 'Tyr' : 'Y', 'Val' : 'V', 'Pyl' : 'O', 'seC' : 'U', '???' : 'X' } def output_line(gff3): print '\t'.join(str(gff3[x]) for x in KEY_ORDER) print '##gff-version 3' for line in sys.stdin: if line.startswith('>'): genome_id = line[1:].strip() if ' ' in genome_id: genome_id = genome_id[0:genome_id.index(' ')] else: data = line.split() if len(data) == 5: # Parse data strand = '-' if data[2].startswith('c') else '+' start, end = data[2][data[2].index('[') + 1:-1].split(',') gff3 = { 'parent': genome_id, 'source': 'aragorn', 'start': int(start), 'end': int(end), 'strand': strand, 'score': '.', '8': '.', } aa_long = data[1][5:] aa_short = aa_table[aa_long] anticodon = data[4][1:data[4].index(")")].upper().replace("T", "U") name = 'trn{}-{}'.format(aa_short, anticodon) if not full_gene_model: gff3.update({ 'type': 'tRNA', 'quals': 'ID=tRNA{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name), }) output_line(gff3) else: gff3.update({ 'type': 'gene', 'quals': 'ID=gene{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name), }) output_line(gff3) gff3.update({ 'type': 'tRNA', 'quals': 'ID=tRNA{0}.{1};Parent=gene{0}.{1};Name={name};product={2}'.format(genome_id, *data, name = name), }) output_line(gff3) # If no introns if ')i(' not in data[4]: gff3['type'] = 'exon' gff3['quals'] = 'Parent=tRNA{0}.{1}'.format(genome_id, *data) output_line(gff3) else: intron_location = data[4][data[4].rindex('(') + 1:-1].split(',') intron_start, intron_length = map(int, intron_location) if strand == '+': original_end = gff3['end'] else: original_end = gff3['start'] # EXON gff3.update({ 'type': 'exon', 'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data), }) if strand == '+': gff3['end'] = gff3['start'] + intron_start - 2 else: gff3['start'] = gff3['end'] - intron_start + 2 output_line(gff3) # INTRON gff3.update({ 'type': 'intron', 'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data), }) if strand == '+': gff3['start'] = gff3['end'] + 1 gff3['end'] = gff3['start'] + intron_length + 2 else: gff3['end'] = gff3['start'] - 1 gff3['start'] = gff3['end'] - intron_length + 1 output_line(gff3) # EXON gff3.update({ 'type': 'exon', 'quals': 'Parent=tRNA{0}.{1}'.format(genome_id, *data), }) if strand == '+': gff3['start'] = gff3['end'] + 1 gff3['end'] = original_end else: gff3['end'] = gff3['start'] - 1 gff3['start'] = original_end output_line(gff3)