Mercurial > repos > iuc > snpeff
comparison gbk2fa.py @ 25:5c7b70713fb5 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit a4c17efb8ec4b3a315766f5b5602effa275fade3
author | iuc |
---|---|
date | Wed, 03 Aug 2022 16:33:45 +0000 |
parents | cfcf33df7fc0 |
children | ca2b512e8d7c |
comparison
equal
deleted
inserted
replaced
24:cfcf33df7fc0 | 25:5c7b70713fb5 |
---|---|
1 import argparse | 1 import argparse |
2 import bz2 | 2 import bz2 |
3 import contextlib | |
4 import gzip | 3 import gzip |
5 import sys | |
6 | 4 |
7 import magic | |
8 from Bio import SeqIO | 5 from Bio import SeqIO |
9 | 6 |
7 | |
8 def get_opener(gbk_filename): | |
9 try: | |
10 bz2.open(gbk_filename).read(1) | |
11 return bz2.open | |
12 except OSError: | |
13 pass | |
14 try: | |
15 gzip.open(gbk_filename).read(1) | |
16 return gzip.open | |
17 except OSError: | |
18 return open | |
19 | |
20 | |
10 parser = argparse.ArgumentParser() | 21 parser = argparse.ArgumentParser() |
11 parser.add_argument("genbank_file", help="GenBank input file. Can be compressed with gzip or bzip2") | 22 parser.add_argument( |
12 parser.add_argument("fasta_file", help="FASTA output datset") | 23 "genbank_file", |
13 parser.add_argument("--remove_version", dest="remove_version", action="store_true", help="Remove version number from NCBI form formatted accession numbers. For example, this would convert 'B000657.2' to 'B000657'") | 24 help="GenBank input file. Can be compressed with gzip or bzip2" |
25 ) | |
26 parser.add_argument( | |
27 "fasta_file", help="FASTA output datset" | |
28 ) | |
29 parser.add_argument( | |
30 "--remove_version", action="store_true", | |
31 help="Remove version number from NCBI form formatted accession numbers. " | |
32 "For example, this would convert 'B000657.2' to 'B000657'" | |
33 ) | |
14 args = parser.parse_args() | 34 args = parser.parse_args() |
15 | 35 |
16 gbk_filename = args.genbank_file | |
17 fa_filename = args.fasta_file | |
18 | 36 |
19 | 37 gbk_open = get_opener(args.genbank_file) |
20 @contextlib.contextmanager | 38 with gbk_open(args.genbank_file, 'rt') as input_handle, \ |
21 def get_file_handle(gbk_filename): | 39 open(args.fasta_file, 'w') as output_handle: |
22 f_type = magic.from_file(args.genbank_file, mime=True) | 40 for seq_record in SeqIO.parse(input_handle, 'genbank'): |
23 if f_type == 'text/plain': | |
24 input_handle = open(gbk_filename, "r") | |
25 elif f_type == 'application/gzip' or f_type == 'application/x-gzip': | |
26 input_handle = gzip.open(gbk_filename, "rt") | |
27 elif f_type == 'application/x-bzip2': | |
28 input_handle = bz2.open(gbk_filename, "rt") | |
29 else: | |
30 sys.exit("Cannot process file of type {}. Only plain, gzip'ed, and bzip2'ed genbank files are accepted ".format(f_type)) | |
31 yield input_handle | |
32 input_handle.close() | |
33 | |
34 | |
35 with get_file_handle(gbk_filename) as input_handle, open(fa_filename, "w") as output_handle: | |
36 | |
37 for seq_record in SeqIO.parse(input_handle, "genbank"): | |
38 if args.remove_version: | 41 if args.remove_version: |
39 seq_id = seq_record.id.split('.')[0] | 42 seq_id = seq_record.id.split('.')[0] |
40 else: | 43 else: |
41 seq_id = seq_record.id | 44 seq_id = seq_record.id |
42 print('Writing FASTA record: {}'.format(seq_id)) | 45 print('Writing FASTA record: {}'.format(seq_id)) |
43 output_handle.write(">{}\n{}\n".format(seq_id, seq_record.seq)) | 46 print('>' + seq_id, file=output_handle) |
47 print(seq_record.seq, file=output_handle) |