Mercurial > repos > iuc > snpeff
diff gbk2fa.py @ 29:ca2b512e8d7c draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
author | iuc |
---|---|
date | Mon, 21 Oct 2024 13:56:15 +0000 |
parents | 5c7b70713fb5 |
children |
line wrap: on
line diff
--- a/gbk2fa.py Sat Sep 28 16:27:56 2024 +0000 +++ b/gbk2fa.py Mon Oct 21 13:56:15 2024 +0000 @@ -6,6 +6,9 @@ def get_opener(gbk_filename): + """Determines the appropriate opener for a given file, supporting + bzip2, gzip, or standard open. + """ try: bz2.open(gbk_filename).read(1) return bz2.open @@ -18,30 +21,38 @@ return open -parser = argparse.ArgumentParser() -parser.add_argument( - "genbank_file", - help="GenBank input file. Can be compressed with gzip or bzip2" -) -parser.add_argument( - "fasta_file", help="FASTA output datset" -) -parser.add_argument( - "--remove_version", action="store_true", - help="Remove version number from NCBI form formatted accession numbers. " - "For example, this would convert 'B000657.2' to 'B000657'" -) -args = parser.parse_args() +def main(): + parser = argparse.ArgumentParser( + description="Convert GenBank files to FASTA format. " + "Supports gzip and bzip2 compressed files." + ) + parser.add_argument( + "genbank_file", + help="GenBank input file. Can be compressed with gzip or bzip2" + ) + parser.add_argument( + "fasta_file", + help="FASTA output dataset" + ) + parser.add_argument( + "--remove_version", action="store_true", + help="Remove version number from NCBI formatted accession numbers. " + "For example, this converts 'B000657.2' to 'B000657'." + ) + args = parser.parse_args() + + gbk_open = get_opener(args.genbank_file) + with gbk_open(args.genbank_file, 'rt') as input_handle, \ + open(args.fasta_file, 'w') as output_handle: + for seq_record in SeqIO.parse(input_handle, 'genbank'): + if args.remove_version: + seq_id = seq_record.id.split('.')[0] + else: + seq_id = seq_record.id + print(f'Writing FASTA record: {seq_id}') + output_handle.write(f'>{seq_id}\n') + output_handle.write(f'{seq_record.seq}\n') -gbk_open = get_opener(args.genbank_file) -with gbk_open(args.genbank_file, 'rt') as input_handle, \ - open(args.fasta_file, 'w') as output_handle: - for seq_record in SeqIO.parse(input_handle, 'genbank'): - if args.remove_version: - seq_id = seq_record.id.split('.')[0] - else: - seq_id = seq_record.id - print('Writing FASTA record: {}'.format(seq_id)) - print('>' + seq_id, file=output_handle) - print(seq_record.seq, file=output_handle) +if __name__ == "__main__": + main()