Mercurial > repos > guerler > springsuite
diff spring_package/Molecule.py @ 37:0be0af9e695d draft
"planemo upload commit c716195a2cc1ed30ff8c4936621091296a93b2fc"
author | guerler |
---|---|
date | Wed, 25 Nov 2020 14:35:35 +0000 |
parents | |
children | 172398348efd |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spring_package/Molecule.py Wed Nov 25 14:35:35 2020 +0000 @@ -0,0 +1,168 @@ +class Molecule: + def __init__(self, fileName=None): + self.calpha = dict() + self.biomol = dict() + self.atoms = list() + if fileName is not None: + self.fromFile(fileName) + + def fromFile(self, fileName): + biomolNumber = 0 + biomolChains = list() + self.biomol[0] = None + with open(fileName) as file: + for line in file: + key = line[0:6].strip() + if key == "ATOM": + atom = line[12:16] + atomNumber = line[6:11] + chainName = line[21:22] + if chainName not in self.calpha: + self.calpha[chainName] = dict() + x = self.toFloat(line[30:38]) + y = self.toFloat(line[38:46]) + z = self.toFloat(line[46:54]) + occupancy = self.toFloat(line[54:60], optional=True) + temperature = self.toFloat(line[54:60], optional=True) + residue = line[17:20] + residueNumber = self.toInt(line[22:26]) + atomNumber = self.toInt(line[6:11]) + atomName = line[12:16] + atomDict = dict(x=x, y=y, z=z, + residue=residue, + occupancy=occupancy, + temperature=temperature, + atomNumber=atomNumber, + atomName=atomName, + residueNumber=residueNumber, + chainName=chainName) + if atom.strip() == "CA": + self.calpha[chainName][residueNumber] = atomDict + self.atoms.append(atomDict) + biokey = "REMARK 350 BIOMOLECULE:" + if line[0:len(biokey)] == biokey: + biomolNumber = self.toInt(line[len(biokey):]) + if biomolNumber == 0: + raise Exception("Invalid biomolecule identifier [0].") + biokey = "REMARK 350 APPLY THE FOLLOWING TO CHAINS:" + nextLine = next(file) + while nextLine[:len(biokey)] != biokey: + nextLine = next(file) + biomolChains = nextLine[len(biokey):].split(",") + biomolChains = list(map(lambda x: x.strip(), biomolChains)) + biokey = "REMARK 350 AND CHAINS:" + nextLine = next(file) + while nextLine[:len(biokey)] == biokey: + moreChains = nextLine[len(biokey):].split(",") + moreChains = list(map(lambda x: x.strip(), moreChains)) + biomolChains = biomolChains + moreChains + nextLine = next(file) + biokey = "REMARK 350 BIOMT" + if nextLine[:len(biokey)] == biokey: + biomolMatId1, biomolMat1 = self.getFloats(nextLine) + nextLine = next(file) + biomolMatId2, biomolMat2 = self.getFloats(nextLine) + nextLine = next(file) + biomolMatId3, biomolMat3 = self.getFloats(nextLine) + if biomolMatId1 != biomolMatId2 or biomolMatId1 != biomolMatId3: + raise Exception("Invalid rotation matrix format [%s]." % biomolMatId1) + matrix = [biomolMat1, biomolMat2, biomolMat3] + biomolChains = [c for c in biomolChains if c] + if biomolNumber not in self.biomol: + self.biomol[biomolNumber] = list() + self.biomol[biomolNumber].append(dict(chains=biomolChains, matrix=matrix)) + removeChains = [] + for chainName in self.calpha: + if len(self.calpha[chainName]) == 0: + removeChains.append(chainName) + for chainName in removeChains: + del self.calpha[chainName] + if not self.calpha: + raise Exception("Molecule has no atoms.") + + def getFloats(self, nextLine): + matId = self.toInt(nextLine[20:23]) + matLine = nextLine[23:].split() + matLine = list(map(lambda x: self.toFloat(x), matLine)) + return matId, matLine + + def createUnit(self, biomolNumber=1): + molecule = Molecule() + chainCount = 0 + for matrixDict in self.biomol[biomolNumber]: + for chain in matrixDict["chains"]: + if chain in self.calpha: + chainCopy = dict() + for residue in self.calpha[chain]: + chainCopy[residue] = self.calpha[chain][residue].copy() + for atomNumber in chainCopy: + atom = chainCopy[atomNumber] + rotmat = matrixDict["matrix"] + self.applyMatrix(atom, rotmat) + if chain in molecule.calpha: + chainName = "%s%d" % (chain, chainCount) + else: + chainName = chain + molecule.calpha[chainName] = chainCopy + chainCount = chainCount + 1 + return molecule + + def getSequence(self, chainName): + seq = "" + if chainName not in self.calpha: + raise Exception("Chain identifier not found [%s]" % chainName) + chainDict = self.calpha[chainName] + for residueNumber in sorted(chainDict): + seq = seq + self.toSingleAmino(chainDict[residueNumber]["residue"]) + return seq + + def applyMatrix(self, atom, rotmat): + newx = atom["x"] * rotmat[0][0] + atom["y"] * rotmat[0][1] + atom["z"] * rotmat[0][2] + rotmat[0][3] + newy = atom["x"] * rotmat[1][0] + atom["y"] * rotmat[1][1] + atom["z"] * rotmat[1][2] + rotmat[1][3] + newz = atom["x"] * rotmat[2][0] + atom["y"] * rotmat[2][1] + atom["z"] * rotmat[2][2] + rotmat[2][3] + atom["x"] = newx + atom["y"] = newy + atom["z"] = newz + return atom + + def toFloat(self, x, optional=False): + try: + return float(x) + except Exception: + if not optional: + raise Exception("Invalid float conversion [%s]." % x) + return 0.0 + + def toInt(self, x): + try: + return int(x) + except Exception: + raise Exception("Invalid integer conversion [%s]." % x) + + def toSingleAmino(self, seq): + code = dict(GLY="G", ALA="A", VAL="V", LEU="L", ILE="I", MET="M", PHE="F", PRO="P", TYR="Y", TRP="W", + LYS="K", SER="S", CYS="C", ASN="N", GLN="Q", HIS="H", THR="T", GLU="E", ASP="D", ARG="R") + return code[seq] if seq in code else "X" + + def saveChain(self, chainName, outputName): + print("Writing PDB file to %s." % outputName) + f = open(outputName, "w") + for residueNumber in sorted(self.calpha[chainName].keys()): + ca = self.calpha[chainName][residueNumber] + if ca["residue"] is not None: + f.write(self.atomString(ca)) + f.close() + + def save(self, outputName, append=False, chainName=None): + print("Writing atoms to PDB file to %s." % outputName) + fileFlag = "+a" if append else "w" + f = open(outputName, fileFlag) + for atom in self.atoms: + atom["chainName"] = chainName if chainName else atom["chainName"] + f.write(self.atomString(atom)) + f.write("TER\n") + f.close() + + def atomString(self, atom): + return "ATOM %5d %s %s %1s%4d %8.3f%8.3f%8.3f%6.2f%6.2f\n" % (atom["atomNumber"], atom["atomName"], atom["residue"], atom["chainName"], atom["residueNumber"], + atom["x"], atom["y"], atom["z"], atom["occupancy"], atom["temperature"])