annotate gbk2fa.py @ 25:5c7b70713fb5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
author iuc
date Wed, 03 Aug 2022 16:33:45 +0000
parents cfcf33df7fc0
children ca2b512e8d7c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
1 import argparse
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
2 import bz2
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
3 import gzip
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
4
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
5 from Bio import SeqIO
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
6
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
7
25
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
8 def get_opener(gbk_filename):
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
9 try:
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
10 bz2.open(gbk_filename).read(1)
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
11 return bz2.open
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
12 except OSError:
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
13 pass
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
14 try:
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
15 gzip.open(gbk_filename).read(1)
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
16 return gzip.open
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
17 except OSError:
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
18 return open
10
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
19
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
20
25
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
21 parser = argparse.ArgumentParser()
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
22 parser.add_argument(
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
23 "genbank_file",
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
24 help="GenBank input file. Can be compressed with gzip or bzip2"
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
25 )
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
26 parser.add_argument(
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
27 "fasta_file", help="FASTA output datset"
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
28 )
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
29 parser.add_argument(
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
30 "--remove_version", action="store_true",
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
31 help="Remove version number from NCBI form formatted accession numbers. "
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
32 "For example, this would convert 'B000657.2' to 'B000657'"
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
33 )
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
34 args = parser.parse_args()
10
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
35
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
36
25
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
37 gbk_open = get_opener(args.genbank_file)
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
38 with gbk_open(args.genbank_file, 'rt') as input_handle, \
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
39 open(args.fasta_file, 'w') as output_handle:
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
40 for seq_record in SeqIO.parse(input_handle, 'genbank'):
10
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
41 if args.remove_version:
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
42 seq_id = seq_record.id.split('.')[0]
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
43 else:
5b4ac70948d2 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff changeset
44 seq_id = seq_record.id
24
cfcf33df7fc0 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit e4366b6a23223f84039a0590cf0d7079b83d8f84"
iuc
parents: 15
diff changeset
45 print('Writing FASTA record: {}'.format(seq_id))
25
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
46 print('>' + seq_id, file=output_handle)
5c7b70713fb5 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
iuc
parents: 24
diff changeset
47 print(seq_record.seq, file=output_handle)