Mercurial > repos > iuc > snpeff
annotate gbk2fa.py @ 24:cfcf33df7fc0 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit e4366b6a23223f84039a0590cf0d7079b83d8f84"
author | iuc |
---|---|
date | Wed, 13 Oct 2021 23:30:29 +0000 |
parents | 479c4f2f4826 |
children | 5c7b70713fb5 |
rev | line source |
---|---|
10
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
1 import argparse |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
2 import bz2 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
3 import contextlib |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
4 import gzip |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
5 import sys |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
6 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
7 import magic |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
8 from Bio import SeqIO |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
9 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
10 parser = argparse.ArgumentParser() |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
11 parser.add_argument("genbank_file", help="GenBank input file. Can be compressed with gzip or bzip2") |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
12 parser.add_argument("fasta_file", help="FASTA output datset") |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
13 parser.add_argument("--remove_version", dest="remove_version", action="store_true", help="Remove version number from NCBI form formatted accession numbers. For example, this would convert 'B000657.2' to 'B000657'") |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
14 args = parser.parse_args() |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
15 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
16 gbk_filename = args.genbank_file |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
17 fa_filename = args.fasta_file |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
18 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
19 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
20 @contextlib.contextmanager |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
21 def get_file_handle(gbk_filename): |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
22 f_type = magic.from_file(args.genbank_file, mime=True) |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
23 if f_type == 'text/plain': |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
24 input_handle = open(gbk_filename, "r") |
15
479c4f2f4826
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit 999eca8a05f17ae567f99b8ca3394f2105491173
iuc
parents:
10
diff
changeset
|
25 elif f_type == 'application/gzip' or f_type == 'application/x-gzip': |
10
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
26 input_handle = gzip.open(gbk_filename, "rt") |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
27 elif f_type == 'application/x-bzip2': |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
28 input_handle = bz2.open(gbk_filename, "rt") |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
29 else: |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
30 sys.exit("Cannot process file of type {}. Only plain, gzip'ed, and bzip2'ed genbank files are accepted ".format(f_type)) |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
31 yield input_handle |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
32 input_handle.close() |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
33 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
34 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
35 with get_file_handle(gbk_filename) as input_handle, open(fa_filename, "w") as output_handle: |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
36 |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
37 for seq_record in SeqIO.parse(input_handle, "genbank"): |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
38 if args.remove_version: |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
39 seq_id = seq_record.id.split('.')[0] |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
40 else: |
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
41 seq_id = seq_record.id |
24
cfcf33df7fc0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit e4366b6a23223f84039a0590cf0d7079b83d8f84"
iuc
parents:
15
diff
changeset
|
42 print('Writing FASTA record: {}'.format(seq_id)) |
10
5b4ac70948d2
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpeff commit eea43430ff90fe6b13b295f6d5efb2208401a7ef
iuc
parents:
diff
changeset
|
43 output_handle.write(">{}\n{}\n".format(seq_id, seq_record.seq)) |