Mercurial > repos > petr-novak > dante
comparison fasta2database.py @ 10:d0431a839606 draft
Uploaded
author | petr-novak |
---|---|
date | Wed, 14 Aug 2019 11:24:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:ed4d9ede9cb4 | 10:d0431a839606 |
---|---|
1 #!/usr/bin/env python3 | |
2 ''' | |
3 Helper script to create DANTE databese which can be used in second iteration | |
4 ''' | |
5 import sys | |
6 | |
7 fasta_input = sys.argv[1] | |
8 db_fasta_output_file = sys.argv[2] | |
9 db_classification_file = sys.argv[3] | |
10 classification_table = set() | |
11 # fasta header will be reformatted to correct REXdb classification | |
12 with open(fasta_input, 'r') as f, open(db_fasta_output_file, 'w') as out: | |
13 for line in f: | |
14 if line[0] == ">": | |
15 ## modify header | |
16 name, domain, classification = line.split(" ") | |
17 name_clean=name[1:].replace("-","_") | |
18 new_header = ">NA-{}__{}\n".format(domain, name_clean) | |
19 classification_string = "\t".join(classification.split("|")) | |
20 classification_table.add("{}\t{}".format(name_clean, classification_string)) | |
21 out.write(new_header) | |
22 else: | |
23 out.write(line) | |
24 with open(db_classification_file, 'w') as f: | |
25 f.writelines(classification_table) |