annotate glimmer3/gbk_to_orf.py @ 0:e11e303c39a4 default tip

Uploaded
author bjoern-gruening
date Wed, 11 Jan 2012 09:34:45 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
1 #!/usr/bin/env python
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
2
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
3 ###################################################################
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
4 ##
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
5 ## gbk2orf.py by Errol Strain (estrain@gmail.com)
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
6 ##
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
7 ## Read a GenBank file and export fasta formatted amino acid and
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
8 ## CDS files
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
9 ##
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
10 ###################################################################
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
11
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
12 import sys
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
13 from optparse import OptionParser
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
14 from Bio import SeqIO
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
15 from Bio.Seq import Seq
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
16 from Bio.SeqRecord import SeqRecord
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
17
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
18
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
19 ## Command line usage
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
20 usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta"
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
21 p = OptionParser(usage)
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
22 p.add_option("-t","--translate", dest="transtabl",type="int",default=11,
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
23 help="Translation table used to translate coding regions (default=11)")
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
24 p.add_option("-g","--genbank", dest="gb_file",help="GenBank input file")
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
25 p.add_option("-a","--amino_acid", dest="aa_file",help="Fasta amino acid output")
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
26 p.add_option("-n","--nucleotide", dest="orf_file",help="Fasta nucleotide output")
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
27 (opts, args) = p.parse_args()
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
28 ## Do I need this next line?
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
29 if not opts and not args : p.error("Use --help to see usage")
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
30 if len(sys.argv)==1 : p.error("Use --help to see usage")
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
31
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
32 ## Lists to hold SeqRecords
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
33 aalist = []
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
34 nuclist = []
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
35
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
36 ## If the CDS does not have a locus tag the name will be assigned using the
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
37 ## order in which it was found
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
38 feat_count=0
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
39
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
40 ## Iterate through genbank records in input file
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
41 for gb_record in SeqIO.parse(open(opts.gb_file,"r"), "genbank") :
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
42 for (index, feature) in enumerate(gb_record.features) :
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
43 if feature.type=="CDS" :
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
44 feat_count = feat_count + 1
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
45 gene = feature.extract(gb_record.seq)
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
46 if "locus_tag" in feature.qualifiers :
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
47 value = feature.qualifiers["locus_tag"][0]
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
48 else :
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
49 value = "Index_" + str(feat_count)
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
50 nuclist.append(SeqRecord(Seq(str(gene)),id=value,name=value))
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
51 pro=Seq(str(gene.translate(table=opts.transtabl,to_stop=True)))
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
52 aalist.append(SeqRecord(pro,id=value,name=value))
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
53
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
54 ## Write out lists in fasta format
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
55 aa_handle = open(opts.aa_file,"w")
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
56 SeqIO.write(aalist,aa_handle,"fasta")
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
57 aa_handle.close()
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
58 orf_handle = open(opts.orf_file,"w")
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
59 SeqIO.write(nuclist,orf_handle,"fasta")
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
60 orf_handle.close()
e11e303c39a4 Uploaded
bjoern-gruening
parents:
diff changeset
61