annotate tools/data_source/genbank.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 from Bio import GenBank
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 import sys, os, textwrap
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 def make_fasta(rec):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 '''Creates fasta format from a record'''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 gi = rec.annotations.get('gi','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 org = rec.annotations.get('organism','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 date = rec.annotations.get('date','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 head = '>gi:%s, id:%s, org:%s, date:%s\n' % (gi, rec.id, org, date)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 body = '\n'.join(textwrap.wrap(rec.seq.data, width=80))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 return head, body
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 if __name__ == '__main__':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 mode = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 text = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 output_file = sys.argv[3]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 print 'Searching for %s <br>' % text
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 # check if inputs are all numbers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 gi_list = text.split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 tmp = map(int, gi_list)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 except ValueError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 gi_list = GenBank.search_for(text, max_ids=10)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 fp = open(output_file, 'wt')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 record_parser = GenBank.FeatureParser()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 ncbi_dict = GenBank.NCBIDictionary(mode, 'genbank', parser = record_parser)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 for gid in gi_list:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 res = ncbi_dict[gid]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 head, body = make_fasta(res)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 fp.write(head+body+'\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 print head
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 fp.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42