diff gbk2fa.py @ 29:ca2b512e8d7c draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 4cc10cfe702828a91ecf8bb58d3f84a36b0578f7
author iuc
date Mon, 21 Oct 2024 13:56:15 +0000
parents 5c7b70713fb5
children
line wrap: on
line diff
--- a/gbk2fa.py	Sat Sep 28 16:27:56 2024 +0000
+++ b/gbk2fa.py	Mon Oct 21 13:56:15 2024 +0000
@@ -6,6 +6,9 @@
 
 
 def get_opener(gbk_filename):
+    """Determines the appropriate opener for a given file, supporting
+    bzip2, gzip, or standard open.
+    """
     try:
         bz2.open(gbk_filename).read(1)
         return bz2.open
@@ -18,30 +21,38 @@
         return open
 
 
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    "genbank_file",
-    help="GenBank input file. Can be compressed with gzip or bzip2"
-)
-parser.add_argument(
-    "fasta_file", help="FASTA output datset"
-)
-parser.add_argument(
-    "--remove_version", action="store_true",
-    help="Remove version number from NCBI form formatted accession numbers. "
-         "For example, this would convert 'B000657.2' to 'B000657'"
-)
-args = parser.parse_args()
+def main():
+    parser = argparse.ArgumentParser(
+        description="Convert GenBank files to FASTA format. "
+                    "Supports gzip and bzip2 compressed files."
+    )
+    parser.add_argument(
+        "genbank_file",
+        help="GenBank input file. Can be compressed with gzip or bzip2"
+    )
+    parser.add_argument(
+        "fasta_file",
+        help="FASTA output dataset"
+    )
+    parser.add_argument(
+        "--remove_version", action="store_true",
+        help="Remove version number from NCBI formatted accession numbers. "
+             "For example, this converts 'B000657.2' to 'B000657'."
+    )
+    args = parser.parse_args()
+
+    gbk_open = get_opener(args.genbank_file)
+    with gbk_open(args.genbank_file, 'rt') as input_handle, \
+            open(args.fasta_file, 'w') as output_handle:
+        for seq_record in SeqIO.parse(input_handle, 'genbank'):
+            if args.remove_version:
+                seq_id = seq_record.id.split('.')[0]
+            else:
+                seq_id = seq_record.id
+            print(f'Writing FASTA record: {seq_id}')
+            output_handle.write(f'>{seq_id}\n')
+            output_handle.write(f'{seq_record.seq}\n')
 
 
-gbk_open = get_opener(args.genbank_file)
-with gbk_open(args.genbank_file, 'rt') as input_handle, \
-     open(args.fasta_file, 'w') as output_handle:
-    for seq_record in SeqIO.parse(input_handle, 'genbank'):
-        if args.remove_version:
-            seq_id = seq_record.id.split('.')[0]
-        else:
-            seq_id = seq_record.id
-        print('Writing FASTA record: {}'.format(seq_id))
-        print('>' + seq_id, file=output_handle)
-        print(seq_record.seq, file=output_handle)
+if __name__ == "__main__":
+    main()