Mercurial > repos > guerler > springsuite
annotate spring_package/Alignment.py @ 39:172398348efd draft
"planemo upload commit 26b4018c88041ee0ca7c2976e0a012015173d7b6-dirty"
author | guerler |
---|---|
date | Fri, 22 Jan 2021 15:50:27 +0000 |
parents | 0be0af9e695d |
children |
rev | line source |
---|---|
37
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
1 from Bio import pairwise2 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
2 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
3 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
4 class Alignment: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
5 def __init__(self, fileName): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
6 self.queryName = None |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
7 self.queryStart = list() |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
8 self.queryAlignment = list() |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
9 self.templateName = None |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
10 self.templateStart = list() |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
11 self.templateAlignment = list() |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
12 self.readFile(fileName) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
13 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
14 def readFile(self, fileName): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
15 with open(fileName) as file: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
16 for line in file: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
17 cols = line.split() |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
18 if len(cols) > 1 and cols[0] == "Query": |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
19 self.queryName = cols[1].split()[0][0:14] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
20 if len(cols) > 1 and cols[0].startswith(">"): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
21 self.templateName = cols[0][1:] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
22 if self.queryName and self.templateName: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
23 if len(cols) > 2: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
24 if cols[0] == "Q" and cols[1] == self.queryName: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
25 self.queryStart.append(self.getStart(cols[2])) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
26 self.queryAlignment.append(cols[3]) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
27 if cols[0] == "T" and cols[1] == self.templateName: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
28 self.templateStart.append(self.getStart(cols[2])) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
29 self.templateAlignment.append(cols[3]) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
30 if len(cols) > 1 and cols[0] == "No" and cols[1] == "2": |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
31 break |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
32 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
33 def createModel(self, templateChain): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
34 hhrMapping = self.mapSequence(templateChain) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
35 previousResidue = dict() |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
36 for residueNumber in templateChain: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
37 templateResidue = templateChain[residueNumber]["residue"] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
38 previousResidue[residueNumber] = templateResidue |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
39 templateChain[residueNumber]["residue"] = None |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
40 tCount = 0 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
41 for i in range(len(self.templateAlignment)): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
42 templateSequence = self.templateAlignment[i] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
43 querySequence = self.queryAlignment[i] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
44 queryStart = self.queryStart[i] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
45 n = len(querySequence) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
46 qCount = 0 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
47 for j in range(n): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
48 qs = querySequence[j] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
49 ts = templateSequence[j] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
50 rs = hhrMapping[tCount] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
51 if rs != "-" and qs != "-" and ts != "-": |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
52 residueNumber = rs |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
53 if residueNumber in templateChain: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
54 pr = previousResidue[residueNumber] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
55 if pr != self.toThreeAmino(ts): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
56 print("Warning: Ignoring mismatching residue [%s != %s]." % (pr, self.toThreeAmino(ts))) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
57 templateChain[residueNumber]["residue"] = self.toThreeAmino(qs) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
58 templateChain[residueNumber]["residueNumber"] = queryStart + qCount |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
59 else: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
60 print("Warning: Skipping missing residue [%s]." % residueNumber) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
61 if qs != "-": |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
62 qCount = qCount + 1 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
63 if ts != "-": |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
64 tCount = tCount + 1 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
65 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
66 def mapSequence(self, templateChain): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
67 pdbSequence = "" |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
68 for residueNumber in templateChain: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
69 templateResidue = templateChain[residueNumber]["residue"] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
70 pdbSequence = pdbSequence + self.toSingleAmino(templateResidue) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
71 hhrSequence = "" |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
72 for i in range(len(self.templateAlignment)): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
73 templateSequence = self.templateAlignment[i] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
74 for s in templateSequence: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
75 if s != "-": |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
76 hhrSequence = hhrSequence + s |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
77 alignments = pairwise2.align.globalxx(pdbSequence, hhrSequence) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
78 pdbAlignment = alignments[0].seqA |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
79 hhrAlignment = alignments[0].seqB |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
80 pCount = 0 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
81 hhrMapping = [] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
82 for i in range(len(pdbAlignment)): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
83 p = pdbAlignment[i] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
84 h = hhrAlignment[i] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
85 if h != "-": |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
86 residueIndex = pCount if p != "-" else p |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
87 hhrMapping.append(residueIndex) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
88 if p != "-": |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
89 pCount = pCount + 1 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
90 sortedResidueIndex = sorted(templateChain.keys()) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
91 for i in range(len(hhrMapping)): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
92 if hhrMapping[i] != "-": |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
93 hhrMapping[i] = sortedResidueIndex[hhrMapping[i]] |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
94 return hhrMapping |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
95 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
96 def getStart(self, x): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
97 try: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
98 return int(x) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
99 except Exception: |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
100 raise Exception("Invalid start index in alignment [%s]." % x) |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
101 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
102 def toThreeAmino(self, seq): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
103 code = dict(G="GLY", A="ALA", V="VAL", L="LEU", I="ILE", M="MET", F="PHE", P="PRO", Y="TYR", W="TRP", |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
104 K="LYS", S="SER", C="CYS", N="ASN", Q="GLN", H="HIS", T="THR", E="GLU", D="ASP", R="ARG") |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
105 return code[seq] if seq in code else "XXX" |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
106 |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
107 def toSingleAmino(self, seq): |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
108 code = dict(GLY="G", ALA="A", VAL="V", LEU="L", ILE="I", MET="M", PHE="F", PRO="P", TYR="Y", TRP="W", |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
109 LYS="K", SER="S", CYS="C", ASN="N", GLN="Q", HIS="H", THR="T", GLU="E", ASP="D", ARG="R") |
0be0af9e695d
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
guerler
parents:
diff
changeset
|
110 return code[seq] if seq in code else "X" |