annotate mutation_column_checker.py @ 94:84e9e5c8c101 draft

"planemo upload commit d4be85014b638f1d50b318d4b735be7f6e973140"
author rhpvorderman
date Fri, 24 Mar 2023 16:58:28 +0000
parents 729738462297
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
83
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
1 import re
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
2
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
3 mutationMatcher = re.compile("^([nactg])(\d+).([nactg]),?[ ]?([A-Z])?(\d+)?[>]?([A-Z;])?(.*)?")
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
4
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
5 with open("7_V-REGION-mutation-and-AA-change-table.txt", 'r') as file_handle:
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
6 first = True
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
7 fr3_index = -1
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
8 for i, line in enumerate(file_handle):
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
9 line_split = line.split("\t")
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
10 if first:
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
11 fr3_index = line_split.index("FR3-IMGT")
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
12 first = False
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
13 continue
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
14
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
15 if len(line_split) < fr3_index:
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
16 continue
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
17
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
18 fr3_data = line_split[fr3_index]
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
19 if len(fr3_data) > 5:
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
20 try:
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
21 test = [mutationMatcher.match(x).groups() for x in fr3_data.split("|") if x]
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
22 except:
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
23 print((line_split[1]))
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
24 print(("Something went wrong at line {line} with:".format(line=line_split[0])))
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
25 #print([x for x in fr3_data.split("|") if not mutationMatcher.match(x)])
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
26 if i % 100000 == 0:
729738462297 "planemo upload commit c0ffc68aec5836d5b20b543106493056a87edf57"
rhpvorderman
parents: 81
diff changeset
27 print(i)