annotate gff_to_gbk.py @ 8:d4f9b7beb52f

cleaning the repository - GUI make it hard
author vipints <vipin@cbio.mskcc.org>
date Thu, 23 Apr 2015 17:51:14 -0400
parents 6e589f267c14
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5
6e589f267c14 Uploaded
devteam
parents:
diff changeset
1 #!/usr/bin/env python
6e589f267c14 Uploaded
devteam
parents:
diff changeset
2 """
6e589f267c14 Uploaded
devteam
parents:
diff changeset
3 Convert data from GFF and associated genome sequence in fasta file into GenBank.
6e589f267c14 Uploaded
devteam
parents:
diff changeset
4
6e589f267c14 Uploaded
devteam
parents:
diff changeset
5 Usage:
6e589f267c14 Uploaded
devteam
parents:
diff changeset
6 python gff_to_gbk.py in.gff in.fasta out.gbk
6e589f267c14 Uploaded
devteam
parents:
diff changeset
7
6e589f267c14 Uploaded
devteam
parents:
diff changeset
8 Requirements:
6e589f267c14 Uploaded
devteam
parents:
diff changeset
9 BioPython:- http://biopython.org/
6e589f267c14 Uploaded
devteam
parents:
diff changeset
10 helper.py : https://github.com/vipints/GFFtools-GX/blob/master/helper.py
6e589f267c14 Uploaded
devteam
parents:
diff changeset
11
6e589f267c14 Uploaded
devteam
parents:
diff changeset
12 Copyright (C)
6e589f267c14 Uploaded
devteam
parents:
diff changeset
13 2010-2012 Friedrich Miescher Laboratory of the Max Planck Society, Tubingen, Germany.
6e589f267c14 Uploaded
devteam
parents:
diff changeset
14 2012-2014 Memorial Sloan Kettering Cancer Center New York City, USA.
6e589f267c14 Uploaded
devteam
parents:
diff changeset
15 """
6e589f267c14 Uploaded
devteam
parents:
diff changeset
16
6e589f267c14 Uploaded
devteam
parents:
diff changeset
17 import sys
6e589f267c14 Uploaded
devteam
parents:
diff changeset
18 import helper
6e589f267c14 Uploaded
devteam
parents:
diff changeset
19 import gffparser_bcbio
6e589f267c14 Uploaded
devteam
parents:
diff changeset
20
6e589f267c14 Uploaded
devteam
parents:
diff changeset
21 from Bio import SeqIO
6e589f267c14 Uploaded
devteam
parents:
diff changeset
22 from Bio.Alphabet import generic_dna
6e589f267c14 Uploaded
devteam
parents:
diff changeset
23
6e589f267c14 Uploaded
devteam
parents:
diff changeset
24 def __main__():
6e589f267c14 Uploaded
devteam
parents:
diff changeset
25 """
6e589f267c14 Uploaded
devteam
parents:
diff changeset
26 main wrapper
6e589f267c14 Uploaded
devteam
parents:
diff changeset
27 """
6e589f267c14 Uploaded
devteam
parents:
diff changeset
28
6e589f267c14 Uploaded
devteam
parents:
diff changeset
29 try:
6e589f267c14 Uploaded
devteam
parents:
diff changeset
30 gff_fname = sys.argv[1]
6e589f267c14 Uploaded
devteam
parents:
diff changeset
31 fasta_fname = sys.argv[2]
6e589f267c14 Uploaded
devteam
parents:
diff changeset
32 gb_fname = sys.argv[3]
6e589f267c14 Uploaded
devteam
parents:
diff changeset
33 except:
6e589f267c14 Uploaded
devteam
parents:
diff changeset
34 print __doc__
6e589f267c14 Uploaded
devteam
parents:
diff changeset
35 sys.exit(-1)
6e589f267c14 Uploaded
devteam
parents:
diff changeset
36
6e589f267c14 Uploaded
devteam
parents:
diff changeset
37 fasta_fh = helper.open_file(fasta_fname)
6e589f267c14 Uploaded
devteam
parents:
diff changeset
38
6e589f267c14 Uploaded
devteam
parents:
diff changeset
39 fasta_rec = SeqIO.to_dict(SeqIO.parse(fasta_fh, "fasta", generic_dna))
6e589f267c14 Uploaded
devteam
parents:
diff changeset
40 fasta_fh.close()
6e589f267c14 Uploaded
devteam
parents:
diff changeset
41
6e589f267c14 Uploaded
devteam
parents:
diff changeset
42 gff_rec = gffparser_bcbio.parse(gff_fname, fasta_rec)
6e589f267c14 Uploaded
devteam
parents:
diff changeset
43
6e589f267c14 Uploaded
devteam
parents:
diff changeset
44 try:
6e589f267c14 Uploaded
devteam
parents:
diff changeset
45 gb_fh = open(gb_fname, "w")
6e589f267c14 Uploaded
devteam
parents:
diff changeset
46 except:
6e589f267c14 Uploaded
devteam
parents:
diff changeset
47 print 'file not ready for writing %s' % gb_fname
6e589f267c14 Uploaded
devteam
parents:
diff changeset
48 sys.exit(-1)
6e589f267c14 Uploaded
devteam
parents:
diff changeset
49
6e589f267c14 Uploaded
devteam
parents:
diff changeset
50 SeqIO.write(gff_rec, gb_fh, "genbank")
6e589f267c14 Uploaded
devteam
parents:
diff changeset
51 gb_fh.close()
6e589f267c14 Uploaded
devteam
parents:
diff changeset
52
6e589f267c14 Uploaded
devteam
parents:
diff changeset
53 if __name__=="__main__":
6e589f267c14 Uploaded
devteam
parents:
diff changeset
54 __main__()