diff summary_to_fasta.py @ 0:c33d93683a09 draft

Uploaded
author davidvanzessen
date Thu, 13 Oct 2016 10:52:24 -0400
parents
children 729738462297
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/summary_to_fasta.py	Thu Oct 13 10:52:24 2016 -0400
@@ -0,0 +1,42 @@
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file")
+parser.add_argument("--fasta", help="The output fasta file")
+
+args = parser.parse_args()
+
+infile = args.input
+fasta = args.fasta
+
+with open(infile, 'r') as i, open(fasta, 'w') as o:
+	first = True
+	id_col = 0
+	seq_col = 0
+	no_results = 0
+	no_seqs = 0
+	passed = 0
+	for line in i:
+		splt = line.split("\t")
+		if first:
+			id_col = splt.index("Sequence ID")
+			seq_col = splt.index("Sequence")
+			first = False
+			continue
+		if len(splt) < 5:
+			no_results += 1
+			continue
+		
+		ID = splt[id_col]
+		seq = splt[seq_col]
+		
+		if not len(seq) > 0:
+			no_seqs += 1
+			continue
+		
+		o.write(">" + ID + "\n" + seq + "\n")
+		passed += 1
+			
+	print "No results:", no_results
+	print "No sequences:", no_seqs
+	print "Written to fasta file:", passed