diff mutation_column_checker.py @ 78:aff3ba86ef7a draft

Uploaded
author davidvanzessen
date Mon, 31 Aug 2020 11:20:08 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mutation_column_checker.py	Mon Aug 31 11:20:08 2020 -0400
@@ -0,0 +1,27 @@
+import re
+
+mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?")
+
+with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle:
+    first = True
+    fr3_index = -1
+    for i, line in enumerate(file_handle):
+        line_split = line.split("\t")
+        if first:
+            fr3_index = line_split.index("FR3-IMGT")
+            first = False
+            continue
+
+        if len(line_split) < fr3_index:
+            continue
+        
+        fr3_data = line_split[fr3_index]
+        if len(fr3_data) > 5:
+            try:
+                test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x]
+            except:
+                print(line_split[1])
+                print("Something went wrong at line {line} with:".format(line=line_split[0]))
+                #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)])
+        if i % 100000 == 0:
+            print(i)