diff glimmer_gbk_to_orf.py @ 0:9c195b26a5ac draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/glimmer commit 37388949e348d221170659bbee547bf4ac67ef1a
author bgruening
date Tue, 28 Nov 2017 10:08:06 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/glimmer_gbk_to_orf.py	Tue Nov 28 10:08:06 2017 -0500
@@ -0,0 +1,63 @@
+#!/usr/bin/env python
+
+###################################################################
+#
+# gbk2orf.py by Errol Strain (estrain@gmail.com)
+#
+# Read a GenBank file and export fasta formatted amino acid and
+# CDS files
+#
+###################################################################
+
+import sys
+from optparse import OptionParser
+
+from Bio import SeqIO
+from Bio.Seq import Seq
+from Bio.SeqRecord import SeqRecord
+
+
+# Command line usage
+usage = "usage: %prog -g input.gbk -a aa.fasta -n nuc.fasta"
+p = OptionParser(usage)
+p.add_option("-t", "--translate", dest="transtabl", type="int", default=11,
+             help="Translation table used to translate coding regions (default=11)")
+p.add_option("-g", "--genbank", dest="gb_file", help="GenBank input file")
+p.add_option("-a", "--amino_acid", dest="aa_file", help="Fasta amino acid output")
+p.add_option("-n", "--nucleotide", dest="orf_file", help="Fasta nucleotide output")
+(opts, args) = p.parse_args()
+# Do I need this next line?
+if not opts and not args:
+    p.error("Use --help to see usage")
+if len(sys.argv) == 1:
+    p.error("Use --help to see usage")
+
+# Lists to hold SeqRecords
+aalist = []
+nuclist = []
+
+# If the CDS does not have a locus tag the name will be assigned using the
+# order in which it was found
+feat_count = 0
+
+# Iterate through genbank records in input file
+for gb_record in SeqIO.parse(open(opts.gb_file, "r"), "genbank"):
+    for (index, feature) in enumerate(gb_record.features):
+        if feature.type == "CDS":
+            feat_count = feat_count + 1
+            gene = feature.extract(gb_record.seq)
+            if "locus_tag" in feature.qualifiers:
+                value = feature.qualifiers["locus_tag"][0]
+            else:
+                value = "Index_" + str(feat_count)
+            nuclist.append(SeqRecord(Seq(str(gene)), id=value, name=value))
+            pro = Seq(str(gene.translate(table=opts.transtabl, to_stop=True)))
+            aalist.append(SeqRecord(pro, id=value, name=value))
+
+# Write out lists in fasta format
+aa_handle = open(opts.aa_file, "w")
+SeqIO.write(aalist, aa_handle, "fasta")
+aa_handle.close()
+orf_handle = open(opts.orf_file, "w")
+SeqIO.write(nuclist, orf_handle, "fasta")
+orf_handle.close()