comparison gbk_to_five_col.py @ 1:1bdd481d5c25 draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:42:57 +0000
parents
children
comparison
equal deleted inserted replaced
0:66143811fe8a 1:1bdd481d5c25
1 #!/usr/bin/env python
2 import BIO_FIX_TOPO # NOQA
3 import argparse
4 import logging
5 from Bio import SeqIO
6
7 logging.basicConfig(level=logging.INFO)
8 log = logging.getLogger()
9
10
11 # Read in Genbank file and parse features
12 # Output features into Five Column format
13
14 """
15 >Feature SeqID
16 Line 1
17 Column 1: Start location (first nucleotide) of a feature
18 Column 2: Stop location (last nucleotide) of a feature
19 Column 3: Feature name (for example, 'CDS' or 'mRNA' or 'rRNA' or 'gene' or 'exon')
20 Line2:
21 Column 4: Qualifier name (for example, 'product' or 'number' or 'gene' or 'note')
22 Column 5: Qualifier value
23
24 Repeat for each feature in a seq
25 Repeat Line 2 for each qualifier in a feature
26 """
27
28
29 def gbk_to_5col(genbank):
30 """Converts genbank to BankIt five column format"""
31 for record in SeqIO.parse(genbank, "genbank"):
32 print(">Feature %s" % record.id)
33 for feature in record.features:
34 if feature.type == "source":
35 continue
36 else:
37 for index, part in enumerate(feature.location.parts):
38 if part.strand > 0:
39 start = int(part.start) + 1
40 end = int(part.end)
41 else:
42 start = int(part.end)
43 end = int(part.start) + 1
44 if index == 0:
45 name = feature.type
46 print("%d\t%d\t%s" % (start, end, name))
47 else:
48 print("%d\t%d" % (start, end))
49 for (qualifier, values) in feature.qualifiers.items():
50 for value in values:
51 print("\t\t\t%s\t%s" % (qualifier, value))
52
53
54 if __name__ == "__main__":
55 parser = argparse.ArgumentParser(
56 description="Convert a Genbank file into five column format"
57 )
58 parser.add_argument("genbank", type=argparse.FileType("r"), help="Genbank file")
59
60 args = vars(parser.parse_args())
61 gbk_to_5col(**args)