annotate fasta2database.py @ 14:a6c55d1bdb6c draft

Uploaded
author petr-novak
date Wed, 28 Aug 2019 08:08:47 -0400
parents d0431a839606
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
1 #!/usr/bin/env python3
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
2 '''
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
3 Helper script to create DANTE databese which can be used in second iteration
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
4 '''
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
5 import sys
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
6
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
7 fasta_input = sys.argv[1]
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
8 db_fasta_output_file = sys.argv[2]
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
9 db_classification_file = sys.argv[3]
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
10 classification_table = set()
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
11 # fasta header will be reformatted to correct REXdb classification
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
12 with open(fasta_input, 'r') as f, open(db_fasta_output_file, 'w') as out:
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
13 for line in f:
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
14 if line[0] == ">":
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
15 ## modify header
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
16 name, domain, classification = line.split(" ")
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
17 name_clean=name[1:].replace("-","_")
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
18 new_header = ">NA-{}__{}\n".format(domain, name_clean)
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
19 classification_string = "\t".join(classification.split("|"))
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
20 classification_table.add("{}\t{}".format(name_clean, classification_string))
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
21 out.write(new_header)
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
22 else:
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
23 out.write(line)
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
24 with open(db_classification_file, 'w') as f:
d0431a839606 Uploaded
petr-novak
parents:
diff changeset
25 f.writelines(classification_table)