Mercurial > repos > guerler > springsuite
comparison spring_package/Alignment.py @ 37:0be0af9e695d draft
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
| author | guerler |
|---|---|
| date | Wed, 25 Nov 2020 14:35:35 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 36:2fe8ffff530d | 37:0be0af9e695d |
|---|---|
| 1 from Bio import pairwise2 | |
| 2 | |
| 3 | |
| 4 class Alignment: | |
| 5 def __init__(self, fileName): | |
| 6 self.queryName = None | |
| 7 self.queryStart = list() | |
| 8 self.queryAlignment = list() | |
| 9 self.templateName = None | |
| 10 self.templateStart = list() | |
| 11 self.templateAlignment = list() | |
| 12 self.readFile(fileName) | |
| 13 | |
| 14 def readFile(self, fileName): | |
| 15 with open(fileName) as file: | |
| 16 for line in file: | |
| 17 cols = line.split() | |
| 18 if len(cols) > 1 and cols[0] == "Query": | |
| 19 self.queryName = cols[1].split()[0][0:14] | |
| 20 if len(cols) > 1 and cols[0].startswith(">"): | |
| 21 self.templateName = cols[0][1:] | |
| 22 if self.queryName and self.templateName: | |
| 23 if len(cols) > 2: | |
| 24 if cols[0] == "Q" and cols[1] == self.queryName: | |
| 25 self.queryStart.append(self.getStart(cols[2])) | |
| 26 self.queryAlignment.append(cols[3]) | |
| 27 if cols[0] == "T" and cols[1] == self.templateName: | |
| 28 self.templateStart.append(self.getStart(cols[2])) | |
| 29 self.templateAlignment.append(cols[3]) | |
| 30 if len(cols) > 1 and cols[0] == "No" and cols[1] == "2": | |
| 31 break | |
| 32 | |
| 33 def createModel(self, templateChain): | |
| 34 hhrMapping = self.mapSequence(templateChain) | |
| 35 previousResidue = dict() | |
| 36 for residueNumber in templateChain: | |
| 37 templateResidue = templateChain[residueNumber]["residue"] | |
| 38 previousResidue[residueNumber] = templateResidue | |
| 39 templateChain[residueNumber]["residue"] = None | |
| 40 tCount = 0 | |
| 41 for i in range(len(self.templateAlignment)): | |
| 42 templateSequence = self.templateAlignment[i] | |
| 43 querySequence = self.queryAlignment[i] | |
| 44 queryStart = self.queryStart[i] | |
| 45 n = len(querySequence) | |
| 46 qCount = 0 | |
| 47 for j in range(n): | |
| 48 qs = querySequence[j] | |
| 49 ts = templateSequence[j] | |
| 50 rs = hhrMapping[tCount] | |
| 51 if rs != "-" and qs != "-" and ts != "-": | |
| 52 residueNumber = rs | |
| 53 if residueNumber in templateChain: | |
| 54 pr = previousResidue[residueNumber] | |
| 55 if pr != self.toThreeAmino(ts): | |
| 56 print("Warning: Ignoring mismatching residue [%s != %s]." % (pr, self.toThreeAmino(ts))) | |
| 57 templateChain[residueNumber]["residue"] = self.toThreeAmino(qs) | |
| 58 templateChain[residueNumber]["residueNumber"] = queryStart + qCount | |
| 59 else: | |
| 60 print("Warning: Skipping missing residue [%s]." % residueNumber) | |
| 61 if qs != "-": | |
| 62 qCount = qCount + 1 | |
| 63 if ts != "-": | |
| 64 tCount = tCount + 1 | |
| 65 | |
| 66 def mapSequence(self, templateChain): | |
| 67 pdbSequence = "" | |
| 68 for residueNumber in templateChain: | |
| 69 templateResidue = templateChain[residueNumber]["residue"] | |
| 70 pdbSequence = pdbSequence + self.toSingleAmino(templateResidue) | |
| 71 hhrSequence = "" | |
| 72 for i in range(len(self.templateAlignment)): | |
| 73 templateSequence = self.templateAlignment[i] | |
| 74 for s in templateSequence: | |
| 75 if s != "-": | |
| 76 hhrSequence = hhrSequence + s | |
| 77 alignments = pairwise2.align.globalxx(pdbSequence, hhrSequence) | |
| 78 pdbAlignment = alignments[0].seqA | |
| 79 hhrAlignment = alignments[0].seqB | |
| 80 pCount = 0 | |
| 81 hhrMapping = [] | |
| 82 for i in range(len(pdbAlignment)): | |
| 83 p = pdbAlignment[i] | |
| 84 h = hhrAlignment[i] | |
| 85 if h != "-": | |
| 86 residueIndex = pCount if p != "-" else p | |
| 87 hhrMapping.append(residueIndex) | |
| 88 if p != "-": | |
| 89 pCount = pCount + 1 | |
| 90 sortedResidueIndex = sorted(templateChain.keys()) | |
| 91 for i in range(len(hhrMapping)): | |
| 92 if hhrMapping[i] != "-": | |
| 93 hhrMapping[i] = sortedResidueIndex[hhrMapping[i]] | |
| 94 return hhrMapping | |
| 95 | |
| 96 def getStart(self, x): | |
| 97 try: | |
| 98 return int(x) | |
| 99 except Exception: | |
| 100 raise Exception("Invalid start index in alignment [%s]." % x) | |
| 101 | |
| 102 def toThreeAmino(self, seq): | |
| 103 code = dict(G="GLY", A="ALA", V="VAL", L="LEU", I="ILE", M="MET", F="PHE", P="PRO", Y="TYR", W="TRP", | |
| 104 K="LYS", S="SER", C="CYS", N="ASN", Q="GLN", H="HIS", T="THR", E="GLU", D="ASP", R="ARG") | |
| 105 return code[seq] if seq in code else "XXX" | |
| 106 | |
| 107 def toSingleAmino(self, seq): | |
| 108 code = dict(GLY="G", ALA="A", VAL="V", LEU="L", ILE="I", MET="M", PHE="F", PRO="P", TYR="Y", TRP="W", | |
| 109 LYS="K", SER="S", CYS="C", ASN="N", GLN="Q", HIS="H", THR="T", GLU="E", ASP="D", ARG="R") | |
| 110 return code[seq] if seq in code else "X" |
