Mercurial > repos > bgruening > glimmer_gbk_to_orf
comparison glimmer_gbk_to_orf.py @ 0:6351903666da draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author | bgruening |
---|---|
date | Tue, 28 Nov 2017 10:07:28 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:6351903666da |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 ################################################################### | |
4 # | |
5 # gbk2orf.py by Errol Strain (estrain@gmail.com) | |
6 # | |
7 # Read a GenBank file and export fasta formatted amino acid and | |
8 # CDS files | |
9 # | |
10 ################################################################### | |
11 | |
12 import sys | |
13 from optparse import OptionParser | |
14 | |
15 from Bio import SeqIO | |
16 from Bio.Seq import Seq | |
17 from Bio.SeqRecord import SeqRecord | |
18 | |
19 | |
20 # Command line usage | |
21 usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta" | |
22 p = OptionParser(usage) | |
23 p.add_option("-t", "--translate", dest="transtabl", type="int", default=11, | |
24 help="Translation table used to translate coding regions (default=11)") | |
25 p.add_option("-g", "--genbank", dest="gb_file", help="GenBank input file") | |
26 p.add_option("-a", "--amino_acid", dest="aa_file", help="Fasta amino acid output") | |
27 p.add_option("-n", "--nucleotide", dest="orf_file", help="Fasta nucleotide output") | |
28 (opts, args) = p.parse_args() | |
29 # Do I need this next line? | |
30 if not opts and not args: | |
31 p.error("Use --help to see usage") | |
32 if len(sys.argv) == 1: | |
33 p.error("Use --help to see usage") | |
34 | |
35 # Lists to hold SeqRecords | |
36 aalist = [] | |
37 nuclist = [] | |
38 | |
39 # If the CDS does not have a locus tag the name will be assigned using the | |
40 # order in which it was found | |
41 feat_count = 0 | |
42 | |
43 # Iterate through genbank records in input file | |
44 for gb_record in SeqIO.parse(open(opts.gb_file, "r"), "genbank"): | |
45 for (index, feature) in enumerate(gb_record.features): | |
46 if feature.type == "CDS": | |
47 feat_count = feat_count + 1 | |
48 gene = feature.extract(gb_record.seq) | |
49 if "locus_tag" in feature.qualifiers: | |
50 value = feature.qualifiers["locus_tag"][0] | |
51 else: | |
52 value = "Index_" + str(feat_count) | |
53 nuclist.append(SeqRecord(Seq(str(gene)), id=value, name=value)) | |
54 pro = Seq(str(gene.translate(table=opts.transtabl, to_stop=True))) | |
55 aalist.append(SeqRecord(pro, id=value, name=value)) | |
56 | |
57 # Write out lists in fasta format | |
58 aa_handle = open(opts.aa_file, "w") | |
59 SeqIO.write(aalist, aa_handle, "fasta") | |
60 aa_handle.close() | |
61 orf_handle = open(opts.orf_file, "w") | |
62 SeqIO.write(nuclist, orf_handle, "fasta") | |
63 orf_handle.close() |