Mercurial > repos > davidvanzessen > shm_csr
diff summary_to_fasta.py @ 0:c33d93683a09 draft
Uploaded
author | davidvanzessen |
---|---|
date | Thu, 13 Oct 2016 10:52:24 -0400 |
parents | |
children | 729738462297 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/summary_to_fasta.py Thu Oct 13 10:52:24 2016 -0400 @@ -0,0 +1,42 @@ +import argparse + +parser = argparse.ArgumentParser() +parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file") +parser.add_argument("--fasta", help="The output fasta file") + +args = parser.parse_args() + +infile = args.input +fasta = args.fasta + +with open(infile, 'r') as i, open(fasta, 'w') as o: + first = True + id_col = 0 + seq_col = 0 + no_results = 0 + no_seqs = 0 + passed = 0 + for line in i: + splt = line.split("\t") + if first: + id_col = splt.index("Sequence ID") + seq_col = splt.index("Sequence") + first = False + continue + if len(splt) < 5: + no_results += 1 + continue + + ID = splt[id_col] + seq = splt[seq_col] + + if not len(seq) > 0: + no_seqs += 1 + continue + + o.write(">" + ID + "\n" + seq + "\n") + passed += 1 + + print "No results:", no_results + print "No sequences:", no_seqs + print "Written to fasta file:", passed