Mercurial > repos > bjoern-gruening > glimmer3
diff glimmer3/gbk_to_orf.py @ 0:e11e303c39a4 default tip
Uploaded
author | bjoern-gruening |
---|---|
date | Wed, 11 Jan 2012 09:34:45 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/glimmer3/gbk_to_orf.py Wed Jan 11 09:34:45 2012 -0500 @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +################################################################### +## +## gbk2orf.py by Errol Strain (estrain@gmail.com) +## +## Read a GenBank file and export fasta formatted amino acid and +## CDS files +## +################################################################### + +import sys +from optparse import OptionParser +from Bio import SeqIO +from Bio.Seq import Seq +from Bio.SeqRecord import SeqRecord + + +## Command line usage +usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta" +p = OptionParser(usage) +p.add_option("-t","--translate", dest="transtabl",type="int",default=11, + help="Translation table used to translate coding regions (default=11)") +p.add_option("-g","--genbank", dest="gb_file",help="GenBank input file") +p.add_option("-a","--amino_acid", dest="aa_file",help="Fasta amino acid output") +p.add_option("-n","--nucleotide", dest="orf_file",help="Fasta nucleotide output") +(opts, args) = p.parse_args() +## Do I need this next line? +if not opts and not args : p.error("Use --help to see usage") +if len(sys.argv)==1 : p.error("Use --help to see usage") + +## Lists to hold SeqRecords +aalist = [] +nuclist = [] + +## If the CDS does not have a locus tag the name will be assigned using the +## order in which it was found +feat_count=0 + +## Iterate through genbank records in input file +for gb_record in SeqIO.parse(open(opts.gb_file,"r"), "genbank") : + for (index, feature) in enumerate(gb_record.features) : + if feature.type=="CDS" : + feat_count = feat_count + 1 + gene = feature.extract(gb_record.seq) + if "locus_tag" in feature.qualifiers : + value = feature.qualifiers["locus_tag"][0] + else : + value = "Index_" + str(feat_count) + nuclist.append(SeqRecord(Seq(str(gene)),id=value,name=value)) + pro=Seq(str(gene.translate(table=opts.transtabl,to_stop=True))) + aalist.append(SeqRecord(pro,id=value,name=value)) + +## Write out lists in fasta format +aa_handle = open(opts.aa_file,"w") +SeqIO.write(aalist,aa_handle,"fasta") +aa_handle.close() +orf_handle = open(opts.orf_file,"w") +SeqIO.write(nuclist,orf_handle,"fasta") +orf_handle.close() +