Mercurial > repos > bjoern-gruening > glimmer3
comparison glimmer3/gbk_to_orf.py @ 0:e11e303c39a4 default tip
Uploaded
author | bjoern-gruening |
---|---|
date | Wed, 11 Jan 2012 09:34:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e11e303c39a4 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ################################################################### | |
4 ## | |
5 ## gbk2orf.py by Errol Strain (estrain@gmail.com) | |
6 ## | |
7 ## Read a GenBank file and export fasta formatted amino acid and | |
8 ## CDS files | |
9 ## | |
10 ################################################################### | |
11 | |
12 import sys | |
13 from optparse import OptionParser | |
14 from Bio import SeqIO | |
15 from Bio.Seq import Seq | |
16 from Bio.SeqRecord import SeqRecord | |
17 | |
18 | |
19 ## Command line usage | |
20 usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta" | |
21 p = OptionParser(usage) | |
22 p.add_option("-t","--translate", dest="transtabl",type="int",default=11, | |
23 help="Translation table used to translate coding regions (default=11)") | |
24 p.add_option("-g","--genbank", dest="gb_file",help="GenBank input file") | |
25 p.add_option("-a","--amino_acid", dest="aa_file",help="Fasta amino acid output") | |
26 p.add_option("-n","--nucleotide", dest="orf_file",help="Fasta nucleotide output") | |
27 (opts, args) = p.parse_args() | |
28 ## Do I need this next line? | |
29 if not opts and not args : p.error("Use --help to see usage") | |
30 if len(sys.argv)==1 : p.error("Use --help to see usage") | |
31 | |
32 ## Lists to hold SeqRecords | |
33 aalist = [] | |
34 nuclist = [] | |
35 | |
36 ## If the CDS does not have a locus tag the name will be assigned using the | |
37 ## order in which it was found | |
38 feat_count=0 | |
39 | |
40 ## Iterate through genbank records in input file | |
41 for gb_record in SeqIO.parse(open(opts.gb_file,"r"), "genbank") : | |
42 for (index, feature) in enumerate(gb_record.features) : | |
43 if feature.type=="CDS" : | |
44 feat_count = feat_count + 1 | |
45 gene = feature.extract(gb_record.seq) | |
46 if "locus_tag" in feature.qualifiers : | |
47 value = feature.qualifiers["locus_tag"][0] | |
48 else : | |
49 value = "Index_" + str(feat_count) | |
50 nuclist.append(SeqRecord(Seq(str(gene)),id=value,name=value)) | |
51 pro=Seq(str(gene.translate(table=opts.transtabl,to_stop=True))) | |
52 aalist.append(SeqRecord(pro,id=value,name=value)) | |
53 | |
54 ## Write out lists in fasta format | |
55 aa_handle = open(opts.aa_file,"w") | |
56 SeqIO.write(aalist,aa_handle,"fasta") | |
57 aa_handle.close() | |
58 orf_handle = open(opts.orf_file,"w") | |
59 SeqIO.write(nuclist,orf_handle,"fasta") | |
60 orf_handle.close() | |
61 |