comparison gbk2fa.py @ 29:ca2b512e8d7c draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
author iuc
date Mon, 21 Oct 2024 13:56:15 +0000
parents 5c7b70713fb5
children
comparison
equal deleted inserted replaced
28:6322be79bd8e 29:ca2b512e8d7c
4 4
5 from Bio import SeqIO 5 from Bio import SeqIO
6 6
7 7
8 def get_opener(gbk_filename): 8 def get_opener(gbk_filename):
9 """Determines the appropriate opener for a given file, supporting
10 bzip2, gzip, or standard open.
11 """
9 try: 12 try:
10 bz2.open(gbk_filename).read(1) 13 bz2.open(gbk_filename).read(1)
11 return bz2.open 14 return bz2.open
12 except OSError: 15 except OSError:
13 pass 16 pass
16 return gzip.open 19 return gzip.open
17 except OSError: 20 except OSError:
18 return open 21 return open
19 22
20 23
21 parser = argparse.ArgumentParser() 24 def main():
22 parser.add_argument( 25 parser = argparse.ArgumentParser(
23 "genbank_file", 26 description="Convert GenBank files to FASTA format. "
24 help="GenBank input file. Can be compressed with gzip or bzip2" 27 "Supports gzip and bzip2 compressed files."
25 ) 28 )
26 parser.add_argument( 29 parser.add_argument(
27 "fasta_file", help="FASTA output datset" 30 "genbank_file",
28 ) 31 help="GenBank input file. Can be compressed with gzip or bzip2"
29 parser.add_argument( 32 )
30 "--remove_version", action="store_true", 33 parser.add_argument(
31 help="Remove version number from NCBI form formatted accession numbers. " 34 "fasta_file",
32 "For example, this would convert 'B000657.2' to 'B000657'" 35 help="FASTA output dataset"
33 ) 36 )
34 args = parser.parse_args() 37 parser.add_argument(
38 "--remove_version", action="store_true",
39 help="Remove version number from NCBI formatted accession numbers. "
40 "For example, this converts 'B000657.2' to 'B000657'."
41 )
42 args = parser.parse_args()
43
44 gbk_open = get_opener(args.genbank_file)
45 with gbk_open(args.genbank_file, 'rt') as input_handle, \
46 open(args.fasta_file, 'w') as output_handle:
47 for seq_record in SeqIO.parse(input_handle, 'genbank'):
48 if args.remove_version:
49 seq_id = seq_record.id.split('.')[0]
50 else:
51 seq_id = seq_record.id
52 print(f'Writing FASTA record: {seq_id}')
53 output_handle.write(f'>{seq_id}\n')
54 output_handle.write(f'{seq_record.seq}\n')
35 55
36 56
37 gbk_open = get_opener(args.genbank_file) 57 if __name__ == "__main__":
38 with gbk_open(args.genbank_file, 'rt') as input_handle, \ 58 main()
39 open(args.fasta_file, 'w') as output_handle:
40 for seq_record in SeqIO.parse(input_handle, 'genbank'):
41 if args.remove_version:
42 seq_id = seq_record.id.split('.')[0]
43 else:
44 seq_id = seq_record.id
45 print('Writing FASTA record: {}'.format(seq_id))
46 print('>' + seq_id, file=output_handle)
47 print(seq_record.seq, file=output_handle)