10
|
1 #!/usr/bin/env python3
|
|
2 '''
|
|
3 Helper script to create DANTE databese which can be used in second iteration
|
|
4 '''
|
|
5 import sys
|
|
6
|
|
7 fasta_input = sys.argv[1]
|
|
8 db_fasta_output_file = sys.argv[2]
|
|
9 db_classification_file = sys.argv[3]
|
|
10 classification_table = set()
|
|
11 # fasta header will be reformatted to correct REXdb classification
|
|
12 with open(fasta_input, 'r') as f, open(db_fasta_output_file, 'w') as out:
|
|
13 for line in f:
|
|
14 if line[0] == ">":
|
|
15 ## modify header
|
|
16 name, domain, classification = line.split(" ")
|
|
17 name_clean=name[1:].replace("-","_")
|
|
18 new_header = ">NA-{}__{}\n".format(domain, name_clean)
|
|
19 classification_string = "\t".join(classification.split("|"))
|
|
20 classification_table.add("{}\t{}".format(name_clean, classification_string))
|
|
21 out.write(new_header)
|
|
22 else:
|
|
23 out.write(line)
|
|
24 with open(db_classification_file, 'w') as f:
|
|
25 f.writelines(classification_table)
|