Mercurial > repos > davidvanzessen > shm_csr
comparison mutation_column_checker.py @ 78:aff3ba86ef7a draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Mon, 31 Aug 2020 11:20:08 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 77:58d2377b507d | 78:aff3ba86ef7a |
|---|---|
| 1 import re | |
| 2 | |
| 3 mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?") | |
| 4 | |
| 5 with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle: | |
| 6 first = True | |
| 7 fr3_index = -1 | |
| 8 for i, line in enumerate(file_handle): | |
| 9 line_split = line.split("\t") | |
| 10 if first: | |
| 11 fr3_index = line_split.index("FR3-IMGT") | |
| 12 first = False | |
| 13 continue | |
| 14 | |
| 15 if len(line_split) < fr3_index: | |
| 16 continue | |
| 17 | |
| 18 fr3_data = line_split[fr3_index] | |
| 19 if len(fr3_data) > 5: | |
| 20 try: | |
| 21 test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x] | |
| 22 except: | |
| 23 print(line_split[1]) | |
| 24 print("Something went wrong at line {line} with:".format(line=line_split[0])) | |
| 25 #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)]) | |
| 26 if i % 100000 == 0: | |
| 27 print(i) |
