diff baseline/script_imgt.py @ 63:8728284105ee draft

Uploaded
author davidvanzessen
date Wed, 06 Dec 2017 08:04:52 -0500
parents 4c5ba6b5d10d
children ba33b94637ca
line wrap: on
line diff
--- a/baseline/script_imgt.py	Tue Dec 05 10:57:13 2017 -0500
+++ b/baseline/script_imgt.py	Wed Dec 06 08:04:52 2017 -0500
@@ -10,12 +10,18 @@
 
 args = parser.parse_args()
 
+print "script_imgt.py"
+print "input:", args.input
+print "ref:", args.ref
+print "output:", args.output
+print "id:", args.id
+
 refdic = dict()
 with open(args.ref, 'rU') as ref:
 	currentSeq = ""
 	currentId = ""
 	for line in ref:
-		if line[0] is ">":
+		if line.startswith(">"):
 			if currentSeq is not "" and currentId is not "":
 				refdic[currentId[1:]] = currentSeq
 			currentId = line.rstrip()
@@ -23,7 +29,8 @@
 		else:
 			currentSeq += line.rstrip()
 	refdic[currentId[1:]] = currentSeq
-	
+
+print "Have", str(len(refdic)), "reference sequences"
 
 vPattern = [r"(IGHV[0-9]-[0-9ab]+-?[0-9]?D?\*\d{1,2})"]#,
 #						r"(TRBV[0-9]{1,2}-?[0-9]?-?[123]?)",
@@ -37,16 +44,13 @@
 vPattern = re.compile("|".join(vPattern))
 
 def filterGene(s, pattern):
-	s1 = s[s.find(" ") + 1:]
-	return s1[:s1.find(" ")]
-	"""
     if type(s) is not str:
         return None
     res = pattern.search(s)
     if res:
         return res.group(0)
     return None
-	"""
+
 
 
 currentSeq = ""