Mercurial > repos > davidvanzessen > shm_csr
comparison summary_to_fasta.py @ 0:c33d93683a09 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 13 Oct 2016 10:52:24 -0400 |
parents | |
children | 729738462297 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c33d93683a09 |
---|---|
1 import argparse | |
2 | |
3 parser = argparse.ArgumentParser() | |
4 parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file") | |
5 parser.add_argument("--fasta", help="The output fasta file") | |
6 | |
7 args = parser.parse_args() | |
8 | |
9 infile = args.input | |
10 fasta = args.fasta | |
11 | |
12 with open(infile, 'r') as i, open(fasta, 'w') as o: | |
13 first = True | |
14 id_col = 0 | |
15 seq_col = 0 | |
16 no_results = 0 | |
17 no_seqs = 0 | |
18 passed = 0 | |
19 for line in i: | |
20 splt = line.split("\t") | |
21 if first: | |
22 id_col = splt.index("Sequence ID") | |
23 seq_col = splt.index("Sequence") | |
24 first = False | |
25 continue | |
26 if len(splt) < 5: | |
27 no_results += 1 | |
28 continue | |
29 | |
30 ID = splt[id_col] | |
31 seq = splt[seq_col] | |
32 | |
33 if not len(seq) > 0: | |
34 no_seqs += 1 | |
35 continue | |
36 | |
37 o.write(">" + ID + "\n" + seq + "\n") | |
38 passed += 1 | |
39 | |
40 print "No results:", no_results | |
41 print "No sequences:", no_seqs | |
42 print "Written to fasta file:", passed |