diff summary_to_fasta.py @ 81:b6f9a640e098 draft

Uploaded
author davidvanzessen
date Fri, 19 Feb 2021 15:10:54 +0000
parents c33d93683a09
children 729738462297
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/summary_to_fasta.py	Fri Feb 19 15:10:54 2021 +0000
@@ -0,0 +1,42 @@
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--input", help="The 1_Summary file of an IMGT zip file")
+parser.add_argument("--fasta", help="The output fasta file")
+
+args = parser.parse_args()
+
+infile = args.input
+fasta = args.fasta
+
+with open(infile, 'r') as i, open(fasta, 'w') as o:
+	first = True
+	id_col = 0
+	seq_col = 0
+	no_results = 0
+	no_seqs = 0
+	passed = 0
+	for line in i:
+		splt = line.split("\t")
+		if first:
+			id_col = splt.index("Sequence ID")
+			seq_col = splt.index("Sequence")
+			first = False
+			continue
+		if len(splt) < 5:
+			no_results += 1
+			continue
+		
+		ID = splt[id_col]
+		seq = splt[seq_col]
+		
+		if not len(seq) > 0:
+			no_seqs += 1
+			continue
+		
+		o.write(">" + ID + "\n" + seq + "\n")
+		passed += 1
+			
+	print "No results:", no_results
+	print "No sequences:", no_seqs
+	print "Written to fasta file:", passed