0
|
1 import argparse
|
|
2
|
|
3 parser = argparse.ArgumentParser()
|
|
4 parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file")
|
|
5 parser.add_argument("--fasta", help="The output fasta file")
|
|
6
|
|
7 args = parser.parse_args()
|
|
8
|
|
9 infile = args.input
|
|
10 fasta = args.fasta
|
|
11
|
|
12 with open(infile, 'r') as i, open(fasta, 'w') as o:
|
|
13 first = True
|
|
14 id_col = 0
|
|
15 seq_col = 0
|
|
16 no_results = 0
|
|
17 no_seqs = 0
|
|
18 passed = 0
|
|
19 for line in i:
|
|
20 splt = line.split("\t")
|
|
21 if first:
|
|
22 id_col = splt.index("Sequence ID")
|
|
23 seq_col = splt.index("Sequence")
|
|
24 first = False
|
|
25 continue
|
|
26 if len(splt) < 5:
|
|
27 no_results += 1
|
|
28 continue
|
|
29
|
|
30 ID = splt[id_col]
|
|
31 seq = splt[seq_col]
|
|
32
|
|
33 if not len(seq) > 0:
|
|
34 no_seqs += 1
|
|
35 continue
|
|
36
|
|
37 o.write(">" + ID + "\n" + seq + "\n")
|
|
38 passed += 1
|
|
39
|
|
40 print "No results:", no_results
|
|
41 print "No sequences:", no_seqs
|
|
42 print "Written to fasta file:", passed
|