Mercurial > repos > guerler > springsuite
comparison spring_package/Alignment.py @ 17:c790d25086dc draft
"planemo upload commit b0ede77caf410ab69043d33a44e190054024d340-dirty"
author | guerler |
---|---|
date | Wed, 28 Oct 2020 05:11:56 +0000 (2020-10-28) |
parents | |
children |
comparison
equal
deleted
inserted
replaced
16:16eb2acaaa20 | 17:c790d25086dc |
---|---|
1 from Bio import pairwise2 | |
2 | |
3 class Alignment: | |
4 def __init__(self, fileName): | |
5 self.queryName = None | |
6 self.queryStart = list() | |
7 self.queryAlignment = list() | |
8 self.templateName = None | |
9 self.templateStart = list() | |
10 self.templateAlignment = list() | |
11 self.readFile(fileName) | |
12 | |
13 def readFile(self, fileName): | |
14 with open(fileName) as file: | |
15 for index, line in enumerate(file): | |
16 cols = line.split() | |
17 if len(cols) > 1 and cols[0] == "Query": | |
18 self.queryName = cols[1].split()[0][0:14] | |
19 if len(cols) > 1 and cols[0].startswith(">"): | |
20 self.templateName = cols[0][1:] | |
21 if self.queryName and self.templateName: | |
22 if len(cols) > 2: | |
23 if cols[0] == "Q" and cols[1] == self.queryName: | |
24 self.queryStart.append(self.getStart(cols[2])) | |
25 self.queryAlignment.append(cols[3]) | |
26 if cols[0] == "T" and cols[1] == self.templateName: | |
27 self.templateStart.append(self.getStart(cols[2])) | |
28 self.templateAlignment.append(cols[3]) | |
29 if len(cols) > 1 and cols[0] == "No" and cols[1] == "2": | |
30 break | |
31 | |
32 def createModel(self, templateChain): | |
33 hhrMapping = self.mapSequence(templateChain) | |
34 previousResidue = dict() | |
35 for residueNumber in templateChain: | |
36 templateResidue = templateChain[residueNumber]["residue"] | |
37 previousResidue[residueNumber] = templateResidue | |
38 templateChain[residueNumber]["residue"] = None | |
39 tCount = 0 | |
40 for i in range(len(self.templateAlignment)): | |
41 templateSequence = self.templateAlignment[i] | |
42 querySequence = self.queryAlignment[i] | |
43 queryStart = self.queryStart[i] | |
44 n = len(querySequence) | |
45 qCount = 0 | |
46 for j in range(n): | |
47 qs = querySequence[j] | |
48 ts = templateSequence[j] | |
49 rs = hhrMapping[tCount] | |
50 if rs != "-" and qs != "-" and ts != "-": | |
51 residueNumber = rs | |
52 if residueNumber in templateChain: | |
53 pr = previousResidue[residueNumber] | |
54 if pr != self.toThreeAmino(ts): | |
55 print ("Warning: Ignoring mismatching residue [%s != %s]." % (pr, self.toThreeAmino(ts))) | |
56 templateChain[residueNumber]["residue"] = self.toThreeAmino(qs) | |
57 templateChain[residueNumber]["residueNumber"] = queryStart + qCount | |
58 else: | |
59 print ("Warning: Skipping missing residue [%s]." % residueNumber) | |
60 if qs != "-": | |
61 qCount = qCount + 1 | |
62 if ts != "-": | |
63 tCount = tCount + 1 | |
64 | |
65 def mapSequence(self, templateChain): | |
66 pdbSequence = "" | |
67 for residueNumber in templateChain: | |
68 templateResidue = templateChain[residueNumber]["residue"] | |
69 pdbSequence = pdbSequence + self.toSingleAmino(templateResidue) | |
70 hhrSequence = "" | |
71 for i in range(len(self.templateAlignment)): | |
72 templateSequence = self.templateAlignment[i] | |
73 for s in templateSequence: | |
74 if s != "-": | |
75 hhrSequence = hhrSequence + s | |
76 alignments = pairwise2.align.globalxx(pdbSequence, hhrSequence) | |
77 pdbAlignment = alignments[0].seqA | |
78 hhrAlignment = alignments[0].seqB | |
79 pCount = 0 | |
80 hCount = 0 | |
81 hhrMapping = [] | |
82 for i in range(len(pdbAlignment)): | |
83 p = pdbAlignment[i] | |
84 h = hhrAlignment[i] | |
85 if h != "-": | |
86 residueIndex = pCount if p != "-" else p | |
87 hhrMapping.append(residueIndex) | |
88 if p != "-": | |
89 pCount = pCount + 1 | |
90 sortedResidueIndex = sorted(templateChain.keys()) | |
91 for i in range(len(hhrMapping)): | |
92 if hhrMapping[i] != "-": | |
93 hhrMapping[i] = sortedResidueIndex[hhrMapping[i]] | |
94 return hhrMapping | |
95 | |
96 def getStart(self, x): | |
97 try: | |
98 return int(x) | |
99 except: | |
100 raise Exception("Invalid start index in alignment [%s]." % x) | |
101 | |
102 def toThreeAmino(self, seq): | |
103 code = dict(G="GLY", A="ALA", V="VAL", L="LEU", I="ILE", M="MET", F="PHE", P="PRO", Y="TYR", W="TRP", | |
104 K="LYS", S="SER", C="CYS", N="ASN", Q="GLN", H="HIS", T="THR", E="GLU", D="ASP", R="ARG") | |
105 return code[seq] if seq in code else "XXX" | |
106 | |
107 def toSingleAmino(self, seq): | |
108 code = dict(GLY="G", ALA="A", VAL="V", LEU="L", ILE="I", MET="M", PHE="F", PRO="P", TYR="Y", TRP="W", | |
109 LYS="K", SER="S", CYS="C", ASN="N", GLN="Q", HIS="H", THR="T", GLU="E", ASP="D", ARG="R") | |
110 return code[seq] if seq in code else "X" |