Mercurial > repos > davidmurphy > codonlogo
comparison corebio/resource/scop.py @ 7:8d676bbd1f2d
Uploaded
| author | davidmurphy |
|---|---|
| date | Mon, 16 Jan 2012 07:03:36 -0500 |
| parents | c55bdc2fb9fa |
| children |
comparison
equal
deleted
inserted
replaced
| 6:4a4aca3d57c9 | 7:8d676bbd1f2d |
|---|---|
| 1 | |
| 2 # Copyright 2000 by Jeffrey Chang. All rights reserved. | |
| 3 # Copyright 2001 by Gavin E. Crooks. All rights reserved. | |
| 4 # Modifications Copyright 2004/2005 James Casbon. | |
| 5 # Copyright 2005 by Regents of the University of California. All rights Reserved. | |
| 6 # (Major rewrite for conformance to corebio. Gavin Crooks) | |
| 7 # | |
| 8 # This code is derived from the Biopython distribution and is governed by it's | |
| 9 # license. Please see the LICENSE file that should have been included | |
| 10 # as part of this package. | |
| 11 | |
| 12 | |
| 13 """ SCOP: Structural Classification of Proteins. | |
| 14 | |
| 15 The SCOP database aims to provide a manually constructed classification of | |
| 16 all know protein structures into a hierarchy, the main levels of which | |
| 17 are family, superfamily and fold. | |
| 18 | |
| 19 * SCOP: http://scop.mrc-lmb.cam.ac.uk/scop/ | |
| 20 * Introduction: http://scop.mrc-lmb.cam.ac.uk/scop/intro.html | |
| 21 * SCOP parsable files: http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
| 22 | |
| 23 The Scop object in this module represents the entire SCOP classification. It | |
| 24 can be built from the three SCOP parsable files (see DesRecord, HieRecord and | |
| 25 ClaRecord), modified is so desired, and converted back to the same file formats. | |
| 26 A single SCOP domain (represented by the Domain class) can be obtained from | |
| 27 Scop using the domain's SCOP identifier (sid). | |
| 28 | |
| 29 Classes: | |
| 30 - Scop -- The entire SCOP hierarchy. | |
| 31 - Node -- A node in the SCOP hierarchy. | |
| 32 - Domain -- A SCOP domain. | |
| 33 - Residues -- A collection of residues from a PDB structure. | |
| 34 - HieRecord -- Handle the SCOP HIErarchy files. | |
| 35 - DesRecord -- Handle the SCOP DEScription file. | |
| 36 - ClaRecord -- Handle the SCOP CLAssification file. | |
| 37 | |
| 38 | |
| 39 nodeCodeDict -- A mapping between known 2 letter node codes and a longer | |
| 40 description. The known node types are 'cl' (class), 'cf' | |
| 41 (fold), 'sf' (superfamily), 'fa' (family), 'dm' (domain), | |
| 42 'sp' (species), 'px' (domain). Additional node types may | |
| 43 be added in the future. | |
| 44 """ | |
| 45 | |
| 46 import os, re | |
| 47 | |
| 48 | |
| 49 nodeCodeDict = { 'cl':'class', 'cf':'fold', 'sf':'superfamily', | |
| 50 'fa':'family', 'dm':'protein', 'sp':'species', 'px':'domain'} | |
| 51 | |
| 52 | |
| 53 _nodetype_to_code= dict([[v,k] for k,v in nodeCodeDict.items()]) | |
| 54 | |
| 55 | |
| 56 nodeCodeOrder = [ 'ro', 'cl', 'cf', 'sf', 'fa', 'dm', 'sp', 'px' ] | |
| 57 | |
| 58 | |
| 59 def cmp_sccs(sccs1, sccs2) : | |
| 60 """Order SCOP concise classification strings (sccs). | |
| 61 | |
| 62 a.4.5.1 < a.4.5.11 < b.1.1.1 | |
| 63 | |
| 64 A sccs (e.g. a.4.5.11) compactly represents a domain's classification. | |
| 65 The letter represents the class, and the numbers are the fold, | |
| 66 superfamily, and family, respectively. | |
| 67 | |
| 68 """ | |
| 69 | |
| 70 s1 = sccs1.split(".") | |
| 71 s2 = sccs2.split(".") | |
| 72 | |
| 73 if s1[0] != s2[0]: return cmp(s1[0], s2[0]) | |
| 74 | |
| 75 s1 = map(int, s1[1:]) | |
| 76 s2 = map(int, s2[1:]) | |
| 77 | |
| 78 return cmp(s1,s2) | |
| 79 | |
| 80 | |
| 81 | |
| 82 def _open_scop_file(scop_dir_path, version, filetype) : | |
| 83 filename = "dir.%s.scop.txt_%s" % (filetype,version) | |
| 84 afile = open(os.path.join( scop_dir_path, filename)) | |
| 85 return afile | |
| 86 | |
| 87 | |
| 88 class Scop(object): | |
| 89 """The entire SCOP hierarchy. | |
| 90 | |
| 91 root -- The root node of the hierarchy | |
| 92 domains -- A list of all domains | |
| 93 nodes_by_sid -- A dictionary of nodes indexed by SCOP identifier | |
| 94 (e.g. 'd1hbia_') | |
| 95 domains_by_sunid -- A dictionary of domains indexed by SCOP uniquie | |
| 96 identifiers (e.g. 14996) | |
| 97 """ | |
| 98 def __init__(self): | |
| 99 """ An empty Scop object. | |
| 100 | |
| 101 See also Scop.parse() and Scop.parse_files() | |
| 102 """ | |
| 103 self.root = None | |
| 104 self.domains = [] | |
| 105 self.nodes_by_sunid = dict() | |
| 106 self.domains_by_sid = dict() | |
| 107 | |
| 108 #@classmethod | |
| 109 def parse(cls, dir_path, version) : | |
| 110 """Build the SCOP hierarchy from the SCOP parsable files. | |
| 111 | |
| 112 - dir_path -- A directory that contains the SCOP files | |
| 113 - version -- The SCOP version (as a string) | |
| 114 | |
| 115 The SCOP files are named dir.XXX.scop.txt_VERSION, where XXX | |
| 116 is 'cla', 'des' or 'hie'. | |
| 117 """ | |
| 118 cla_file = None | |
| 119 des_file = None | |
| 120 hie_file = None | |
| 121 try : | |
| 122 cla_file = _open_scop_file( dir_path, version, 'cla') | |
| 123 des_file = _open_scop_file( dir_path, version, 'des') | |
| 124 hie_file = _open_scop_file( dir_path, version, 'hie') | |
| 125 scop = cls.parse_files(cla_file, des_file, hie_file) | |
| 126 finally : | |
| 127 # If we opened the files, we close the files | |
| 128 if cla_file : cla_file.close() | |
| 129 if des_file : des_file.close() | |
| 130 if hie_file : hie_file.close() | |
| 131 | |
| 132 return scop | |
| 133 parse = classmethod(parse) | |
| 134 | |
| 135 #@classmethod | |
| 136 def parse_files(cls, cla_file, des_file, hie_file): | |
| 137 """Build the SCOP hierarchy from the SCOP parsable files. | |
| 138 | |
| 139 - cla_file -- the CLA clasification file | |
| 140 - des_file -- the DES description file | |
| 141 - hie_file -- the HIE hierarchy file | |
| 142 """ | |
| 143 | |
| 144 self = cls() | |
| 145 | |
| 146 sunidDict = {} | |
| 147 | |
| 148 root = Node() | |
| 149 domains = [] | |
| 150 root.sunid=0 | |
| 151 root.type='ro' | |
| 152 sunidDict[root.sunid] = root | |
| 153 | |
| 154 root.description = 'SCOP Root' | |
| 155 | |
| 156 # Build the rest of the nodes using the DES file | |
| 157 for rec in DesRecord.records(des_file): | |
| 158 if rec.nodetype =='px' : | |
| 159 n = Domain() | |
| 160 n.sid = rec.name | |
| 161 domains.append(n) | |
| 162 else : | |
| 163 n = Node() | |
| 164 n.sunid = rec.sunid | |
| 165 n.type = rec.nodetype | |
| 166 n.sccs = rec.sccs | |
| 167 n.description = rec.description | |
| 168 | |
| 169 sunidDict[n.sunid] = n | |
| 170 | |
| 171 # Glue all of the Nodes together using the HIE file | |
| 172 for rec in HieRecord.records(hie_file): | |
| 173 if not rec.sunid in sunidDict : | |
| 174 print rec.sunid #FIXME: HUH? | |
| 175 | |
| 176 n = sunidDict[rec.sunid] | |
| 177 if rec.parent !='': # Not root node | |
| 178 if not rec.parent in sunidDict : | |
| 179 raise ValueError("Incomplete data?") | |
| 180 n.parent = sunidDict[rec.parent] | |
| 181 | |
| 182 for c in rec.children: | |
| 183 if not c in sunidDict : | |
| 184 raise ValueError("Incomplete data?") | |
| 185 n.children.append(sunidDict[c]) | |
| 186 | |
| 187 | |
| 188 # Fill in the gaps with information from the CLA file | |
| 189 sidDict = {} | |
| 190 for rec in ClaRecord.records(cla_file): | |
| 191 n = sunidDict[rec.sunid] | |
| 192 assert n.sccs == rec.sccs | |
| 193 assert n.sid == rec.sid | |
| 194 n.residues = rec.residues | |
| 195 sidDict[n.sid] = n | |
| 196 | |
| 197 # Clean up | |
| 198 self.root = root | |
| 199 self.nodes_by_sunid = sunidDict | |
| 200 self.domains_by_sid = sidDict | |
| 201 self.domains = tuple(domains) | |
| 202 | |
| 203 return self | |
| 204 parse_files = classmethod(parse_files) | |
| 205 | |
| 206 | |
| 207 def write_hie(self, stream) : | |
| 208 """Build an HIE SCOP parsable file from this object""" | |
| 209 nodes = self.nodes_by_sunid.values() | |
| 210 # We order nodes to ease comparison with original file | |
| 211 nodes.sort(lambda n1,n2: cmp(n1.sunid, n2.sunid)) | |
| 212 | |
| 213 for n in nodes : | |
| 214 stream.write(str(n.to_hie_record())) | |
| 215 | |
| 216 | |
| 217 def write_des(self, stream) : | |
| 218 """Build a DES SCOP parsable file from this object""" | |
| 219 nodes = self.nodes_by_sunid.values() | |
| 220 # Origional SCOP file is not ordered? | |
| 221 nodes.sort(lambda n1,n2: cmp(n1.sunid, n2.sunid)) | |
| 222 | |
| 223 for n in nodes : | |
| 224 if n != self.root : | |
| 225 stream.write(str(n.to_des_record())) | |
| 226 | |
| 227 | |
| 228 def write_cla(self, stream) : | |
| 229 """Build a CLA SCOP parsable file from this object""" | |
| 230 nodes = self.domains_by_sid.values() | |
| 231 # We order nodes to ease comparison with original file | |
| 232 nodes.sort(lambda n1,n2: cmp(n1.sunid, n2.sunid)) | |
| 233 | |
| 234 for n in nodes : | |
| 235 stream.write(str(n.to_cla_record())) | |
| 236 # End Scop | |
| 237 | |
| 238 | |
| 239 | |
| 240 class Node(object) : | |
| 241 """ A node in the Scop hierarchy | |
| 242 | |
| 243 sunid -- SCOP unique identifiers. e.g. '14986' | |
| 244 parent -- The parent node | |
| 245 children -- A list of child nodes | |
| 246 sccs -- SCOP concise classification string. e.g. 'a.1.1.2' | |
| 247 type -- A 2 letter node type code. e.g. 'px' for domains | |
| 248 description -- | |
| 249 | |
| 250 """ | |
| 251 def __init__(self) : | |
| 252 """A new, uninitilized SCOP node.""" | |
| 253 self.sunid='' | |
| 254 self.parent = None | |
| 255 self.children=[] | |
| 256 self.sccs = '' | |
| 257 self.type ='' | |
| 258 self.description ='' | |
| 259 | |
| 260 def __str__(self) : | |
| 261 s = [] | |
| 262 s.append(str(self.sunid)) | |
| 263 s.append(self.sccs) | |
| 264 s.append(self.type) | |
| 265 s.append(self.description) | |
| 266 | |
| 267 return " ".join(s) | |
| 268 | |
| 269 def to_hie_record(self): | |
| 270 """Return an Hie.Record""" | |
| 271 rec = HieRecord() | |
| 272 rec.sunid = str(self.sunid) | |
| 273 if self.parent : # Not root node | |
| 274 rec.parent = str(self.parent.sunid) | |
| 275 else: | |
| 276 rec.parent = '-' | |
| 277 for c in self.children : | |
| 278 rec.children.append(str(c.sunid)) | |
| 279 return rec | |
| 280 | |
| 281 def to_des_record(self): | |
| 282 """Return a Des.Record""" | |
| 283 rec = DesRecord() | |
| 284 rec.sunid = str(self.sunid) | |
| 285 rec.nodetype = self.type | |
| 286 rec.sccs = self.sccs | |
| 287 rec.description = self.description | |
| 288 return rec | |
| 289 | |
| 290 def descendents( self, node_type) : | |
| 291 """ Return a list of all decendent nodes of the given type. Node type | |
| 292 can be a two letter code or longer description. e.g. 'fa' or 'family' | |
| 293 """ | |
| 294 if node_type in _nodetype_to_code: | |
| 295 node_type = _nodetype_to_code[node_type] | |
| 296 | |
| 297 nodes = [self] | |
| 298 | |
| 299 while nodes[0].type != node_type: | |
| 300 if nodes[0].type == 'px' : | |
| 301 return [] # Fell of the bottom of the hierarchy | |
| 302 child_list = [] | |
| 303 for n in nodes: | |
| 304 for child in n.children: | |
| 305 child_list.append( child ) | |
| 306 nodes = child_list | |
| 307 | |
| 308 return nodes | |
| 309 | |
| 310 | |
| 311 def ascendent( self, node_type) : | |
| 312 """ Return the ancestor node of the given type, or None. Node type can | |
| 313 be a two letter code or longer description. e.g. 'fa' or 'family' | |
| 314 """ | |
| 315 if node_type in _nodetype_to_code : | |
| 316 node_type = _nodetype_to_code[node_type] | |
| 317 | |
| 318 n = self | |
| 319 if n.type == node_type: return None | |
| 320 while n.type != node_type: | |
| 321 if n.type == 'ro': | |
| 322 return None # Fell of the top of the hierarchy | |
| 323 n = n.parent | |
| 324 | |
| 325 return n | |
| 326 # End Node | |
| 327 | |
| 328 | |
| 329 class Domain(Node) : | |
| 330 """ A SCOP domain. A leaf node in the Scop hierarchy. | |
| 331 | |
| 332 - sid -- The SCOP domain identifier. e.g. 'd5hbib_' | |
| 333 - residues -- A Residue object. It defines the collection | |
| 334 of PDB atoms that make up this domain. | |
| 335 """ | |
| 336 def __init__(self) : | |
| 337 Node.__init__(self) | |
| 338 self.sid = '' | |
| 339 self.residues = None | |
| 340 | |
| 341 def __str__(self) : | |
| 342 s = [] | |
| 343 s.append(self.sid) | |
| 344 s.append(self.sccs) | |
| 345 s.append("("+str(self.residues)+")") | |
| 346 | |
| 347 if not self.parent : | |
| 348 s.append(self.description) | |
| 349 else : | |
| 350 sp = self.parent | |
| 351 dm = sp.parent | |
| 352 s.append(dm.description) | |
| 353 s.append("{"+sp.description+"}") | |
| 354 | |
| 355 return " ".join(s) | |
| 356 | |
| 357 def to_des_record(self): | |
| 358 """Return a des.Record""" | |
| 359 rec = Node.to_des_record(self) | |
| 360 rec.name = self.sid | |
| 361 return rec | |
| 362 | |
| 363 def to_cla_record(self) : | |
| 364 """Return a cla.Record""" | |
| 365 rec = ClaRecord() | |
| 366 rec.sid = self.sid | |
| 367 rec.residues = self.residues | |
| 368 rec.sccs = self.sccs | |
| 369 rec.sunid = self.sunid | |
| 370 | |
| 371 n = self | |
| 372 while n.sunid != 0: # Not root node | |
| 373 rec.hierarchy.append( (n.type, str(n.sunid)) ) | |
| 374 n = n.parent | |
| 375 | |
| 376 rec.hierarchy.reverse() | |
| 377 | |
| 378 return rec | |
| 379 # End Domain | |
| 380 | |
| 381 | |
| 382 | |
| 383 class DesRecord(object): | |
| 384 """ Handle the SCOP DEScription file. | |
| 385 | |
| 386 The file format is described in the scop | |
| 387 "release notes.":http://scop.berkeley.edu/release-notes-1.55.html | |
| 388 The latest DES file can be found | |
| 389 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
| 390 | |
| 391 The DES file consisnt of one DES record per line. Each record | |
| 392 holds information for one node in the SCOP hierarchy, and consist | |
| 393 of 5 tab deliminated fields, | |
| 394 sunid, node type, sccs, node name, node description. | |
| 395 | |
| 396 For example :: | |
| 397 | |
| 398 21953 px b.1.2.1 d1dan.1 1dan T:,U:91-106 | |
| 399 48724 cl b - All beta proteins | |
| 400 48725 cf b.1 - Immunoglobulin-like beta-sandwich | |
| 401 49265 sf b.1.2 - Fibronectin type III | |
| 402 49266 fa b.1.2.1 - Fibronectin type III | |
| 403 | |
| 404 | |
| 405 - sunid -- SCOP unique identifiers | |
| 406 - nodetype -- One of 'cl' (class), 'cf' (fold), 'sf' (superfamily), | |
| 407 'fa' (family), 'dm' (protein), 'sp' (species), | |
| 408 'px' (domain). Additional node types may be added. | |
| 409 - sccs -- SCOP concise classification strings. e.g. b.1.2.1 | |
| 410 - name -- The SCOP ID (sid) for domains (e.g. d1anu1), | |
| 411 currently empty for other node types | |
| 412 - description -- e.g. "All beta proteins","Fibronectin type III", | |
| 413 """ | |
| 414 def __init__(self, record=None): | |
| 415 | |
| 416 if not record : | |
| 417 self.sunid = '' | |
| 418 self.nodetype = '' | |
| 419 self.sccs = '' | |
| 420 self.name = '' | |
| 421 self.description ='' | |
| 422 else : | |
| 423 entry = record.rstrip() # no trailing whitespace | |
| 424 columns = entry.split("\t") # separate the tab-delineated cols | |
| 425 if len(columns) != 5: | |
| 426 raise ValueError("I don't understand the format of %s" % entry) | |
| 427 | |
| 428 self.sunid, self.nodetype, self.sccs, self.name, self.description \ | |
| 429 = columns | |
| 430 if self.name == '-' : self.name ='' | |
| 431 self.sunid = int(self.sunid) | |
| 432 | |
| 433 def __str__(self): | |
| 434 s = [] | |
| 435 s.append(self.sunid) | |
| 436 s.append(self.nodetype) | |
| 437 s.append(self.sccs) | |
| 438 if self.name : | |
| 439 s.append(self.name) | |
| 440 else : | |
| 441 s.append("-") | |
| 442 s.append(self.description) | |
| 443 return "\t".join(map(str,s)) + "\n" | |
| 444 | |
| 445 #@staticmethod | |
| 446 def records(des_file): | |
| 447 """Iterates over a DES file, generating DesRecords """ | |
| 448 for line in des_file: | |
| 449 if line[0] =='#': continue # A comment | |
| 450 if line.isspace() : continue | |
| 451 yield DesRecord(line) | |
| 452 records = staticmethod(records) | |
| 453 # End DesRecord | |
| 454 | |
| 455 class HieRecord(object): | |
| 456 """Handle the SCOP HIErarchy files, which describe the SCOP hierarchy in | |
| 457 terms of SCOP unique identifiers (sunid). | |
| 458 | |
| 459 The file format is described in the scop | |
| 460 "release notes.":http://scop.berkeley.edu/release-notes-1.55.html | |
| 461 The latest HIE file can be found | |
| 462 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
| 463 | |
| 464 "Release 1.55":http://scop.berkeley.edu/parse/dir.hie.scop.txt_1.55 | |
| 465 Records consist of 3 tab deliminated fields; node's sunid, | |
| 466 parent's sunid, and a list of children's sunids. For example :: | |
| 467 | |
| 468 0 - 46456,48724,51349,53931,56572,56835,56992,57942 | |
| 469 21953 49268 - | |
| 470 49267 49266 49268,49269 | |
| 471 | |
| 472 Each record holds information for one node in the SCOP hierarchy. | |
| 473 | |
| 474 sunid -- SCOP unique identifiers of this node | |
| 475 parent -- Parents sunid | |
| 476 children -- Sequence of childrens sunids | |
| 477 """ | |
| 478 def __init__(self, record = None): | |
| 479 self.sunid = None | |
| 480 self.parent = None | |
| 481 self.children = [] | |
| 482 | |
| 483 if not record : return | |
| 484 | |
| 485 # Parses HIE records. | |
| 486 entry = record.rstrip() # no trailing whitespace | |
| 487 columns = entry.split('\t') # separate the tab-delineated cols | |
| 488 if len(columns) != 3: | |
| 489 raise ValueError("I don't understand the format of %s" % entry) | |
| 490 | |
| 491 self.sunid, self.parent, children = columns | |
| 492 | |
| 493 if self.sunid =='-' : self.sunid = '' | |
| 494 if self.parent =='-' : self.parent = '' | |
| 495 else : self.parent = int( self.parent ) | |
| 496 | |
| 497 if children =='-' : | |
| 498 self.children = () | |
| 499 else : | |
| 500 self.children = children.split(',') | |
| 501 self.children = map ( int, self.children ) | |
| 502 | |
| 503 self.sunid = int(self.sunid) | |
| 504 | |
| 505 def __str__(self): | |
| 506 s = [] | |
| 507 s.append(str(self.sunid)) | |
| 508 | |
| 509 if self.parent: | |
| 510 s.append(str(self.parent)) | |
| 511 else: | |
| 512 if self.sunid != 0: | |
| 513 s.append('0') | |
| 514 else: | |
| 515 s.append('-') | |
| 516 | |
| 517 if self.children : | |
| 518 child_str = map(str, self.children) | |
| 519 s.append(",".join(child_str)) | |
| 520 else: | |
| 521 s.append('-') | |
| 522 | |
| 523 return "\t".join(s) + "\n" | |
| 524 | |
| 525 | |
| 526 #@staticmethod | |
| 527 def records(hie_file): | |
| 528 """Iterates over a DOM file, generating DomRecords """ | |
| 529 for line in hie_file: | |
| 530 if line[0] =='#': continue # A comment | |
| 531 if line.isspace() : continue | |
| 532 yield HieRecord(line) | |
| 533 records = staticmethod(records) | |
| 534 # End HieRecord | |
| 535 | |
| 536 | |
| 537 | |
| 538 class ClaRecord(object): | |
| 539 """Handle the SCOP CLAssification file, which describes SCOP domains. | |
| 540 | |
| 541 The file format is described in the scop | |
| 542 "release notes.":http://scop.berkeley.edu/release-notes-1.55.html | |
| 543 The latest CLA file can be found | |
| 544 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
| 545 | |
| 546 sid -- SCOP identifier. e.g. d1danl2 | |
| 547 residues -- The domain definition as a Residues object | |
| 548 sccs -- SCOP concise classification strings. e.g. b.1.2.1 | |
| 549 sunid -- SCOP unique identifier for this domain | |
| 550 hierarchy -- A sequence of tuples (nodetype, sunid) describing the | |
| 551 location of this domain in the SCOP hierarchy. | |
| 552 See the Scop module for a description of nodetypes. | |
| 553 """ | |
| 554 def __init__(self, record=None): | |
| 555 self.sid = '' | |
| 556 self.residues = None | |
| 557 self.sccs = '' | |
| 558 self.sunid ='' | |
| 559 self.hierarchy = [] | |
| 560 | |
| 561 if not record: return | |
| 562 | |
| 563 # Parse a tab-deliminated CLA record. | |
| 564 entry = record.rstrip() # no trailing whitespace | |
| 565 columns = entry.split('\t') # separate the tab-delineated cols | |
| 566 if len(columns) != 6: | |
| 567 raise ValueError("I don't understand the format of %s" % entry) | |
| 568 | |
| 569 self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns | |
| 570 self.residues = Residues(residues) | |
| 571 self.residues.pdbid = pdbid | |
| 572 self.sunid = int(self.sunid) | |
| 573 | |
| 574 h = [] | |
| 575 for ht in hierarchy.split(",") : | |
| 576 h.append( ht.split('=')) | |
| 577 for ht in h: | |
| 578 ht[1] = int(ht[1]) | |
| 579 self.hierarchy = h | |
| 580 | |
| 581 def __str__(self): | |
| 582 s = [] | |
| 583 s.append(self.sid) | |
| 584 s += str(self.residues).split(" ") | |
| 585 s.append(self.sccs) | |
| 586 s.append(self.sunid) | |
| 587 | |
| 588 h=[] | |
| 589 for ht in self.hierarchy: | |
| 590 h.append("=".join(map(str,ht))) | |
| 591 s.append(",".join(h)) | |
| 592 | |
| 593 return "\t".join(map(str,s)) + "\n" | |
| 594 | |
| 595 #@staticmethod | |
| 596 def records(cla_file): | |
| 597 """Iterates over a DOM file, generating DomRecords """ | |
| 598 for line in cla_file: | |
| 599 if line[0] =='#': continue # A comment | |
| 600 if line.isspace() : continue | |
| 601 yield ClaRecord(line) | |
| 602 records = staticmethod(records) | |
| 603 | |
| 604 # End ClaRecord | |
| 605 | |
| 606 | |
| 607 | |
| 608 | |
| 609 class DomRecord(object): | |
| 610 """Handle the SCOP DOMain file. | |
| 611 | |
| 612 The DOM file has been officially deprecated. For more information see | |
| 613 the SCOP"release notes.":http://scop.berkeley.edu/release-notes-1.55.html | |
| 614 The DOM files for older releases can be found | |
| 615 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
| 616 | |
| 617 DOM records consist of 4 tab deliminated fields; | |
| 618 sid, pdbid, residues, hierarchy | |
| 619 For example :: | |
| 620 | |
| 621 d1sctg_ 1sct g: 1.001.001.001.001.001 | |
| 622 d1scth_ 1sct h: 1.001.001.001.001.001 | |
| 623 d1flp__ 1flp - 1.001.001.001.001.002 | |
| 624 d1moh__ 1moh - 1.001.001.001.001.002 | |
| 625 | |
| 626 sid -- The SCOP ID of the entry, e.g. d1anu1 | |
| 627 residues -- The domain definition as a Residues object | |
| 628 hierarchy -- A string specifying where this domain is in the hierarchy. | |
| 629 """ | |
| 630 def __init__(self, record= None): | |
| 631 self.sid = '' | |
| 632 self.residues = [] | |
| 633 self.hierarchy = '' | |
| 634 | |
| 635 if record: | |
| 636 entry = record.rstrip() # no trailing whitespace | |
| 637 columns = entry.split("\t") # separate the tab-delineated cols | |
| 638 if len(columns) != 4: | |
| 639 raise ValueError("I don't understand the format of %s" % entry) | |
| 640 self.sid, pdbid, res, self.hierarchy = columns | |
| 641 self.residues = Residues(res) | |
| 642 self.residues.pdbid = pdbid | |
| 643 | |
| 644 def __str__(self): | |
| 645 s = [] | |
| 646 s.append(self.sid) | |
| 647 s.append(str(self.residues).replace(" ","\t") ) | |
| 648 s.append(self.hierarchy) | |
| 649 return "\t".join(s) + "\n" | |
| 650 | |
| 651 #@staticmethod | |
| 652 def records(dom_file): | |
| 653 """Iterates over a DOM file, generating DomRecords """ | |
| 654 for line in dom_file: | |
| 655 if line[0] =='#': continue # A comment | |
| 656 if line.isspace() : continue | |
| 657 yield DomRecord(line) | |
| 658 records = staticmethod(records) | |
| 659 # End DomRecord | |
| 660 | |
| 661 | |
| 662 | |
| 663 | |
| 664 _pdbid_re = re.compile(r"^(\w\w\w\w)(?:$|\s+|_)(.*)") | |
| 665 _fragment_re = re.compile(r"\(?(\w:)?(-?\w*)-?(-?\w*)\)?(.*)") | |
| 666 | |
| 667 class Residues(object) : | |
| 668 """A collection of residues from a PDB structure. | |
| 669 | |
| 670 This class provides code to work with SCOP domain definitions. These | |
| 671 are concisely expressed as a one or more chain fragments. For example, | |
| 672 "(1bba A:10-20,B:)" indicates residue 10 through 20 (inclusive) of | |
| 673 chain A, and every residue of chain B in the pdb structure 1bba. The pdb | |
| 674 id and brackets are optional. In addition "-" indicates every residue of | |
| 675 a pbd structure with one unnamed chain. | |
| 676 | |
| 677 Start and end residue ids consist of the residue sequence number and an | |
| 678 optional single letter insertion code. e.g. "12", "-1", "1a", "1000" | |
| 679 | |
| 680 | |
| 681 pdbid -- An optional PDB id, e.g. "1bba" | |
| 682 fragments -- A sequence of tuples (chainID, startResID, endResID) | |
| 683 """ | |
| 684 | |
| 685 | |
| 686 def __init__(self, str=None) : | |
| 687 self.pdbid = '' | |
| 688 self.fragments = () | |
| 689 if str is not None : self._parse(str) | |
| 690 | |
| 691 | |
| 692 def _parse(self, string): | |
| 693 string = string.strip() | |
| 694 | |
| 695 #Is there a pdbid at the front? e.g. 1bba A:1-100 | |
| 696 m = _pdbid_re.match(string) | |
| 697 if m is not None : | |
| 698 self.pdbid = m.group(1) | |
| 699 string = m.group(2) # Everything else | |
| 700 | |
| 701 if string=='' or string == '-' or string=='(-)': # no fragments, whole sequence | |
| 702 return | |
| 703 | |
| 704 fragments = [] | |
| 705 for l in string.split(",") : | |
| 706 m = _fragment_re.match(l) | |
| 707 if m is None: | |
| 708 raise ValueError("I don't understand the format of %s" % l) | |
| 709 chain, start, end, postfix = m.groups() | |
| 710 | |
| 711 if postfix != "" : | |
| 712 raise ValueError("I don't understand the format of %s" % l ) | |
| 713 | |
| 714 if chain: | |
| 715 if chain[-1] != ':': | |
| 716 raise ValueError("I don't understand the chain in %s" % l) | |
| 717 chain = chain[:-1] # chop off the ':' | |
| 718 else : | |
| 719 chain ="" | |
| 720 | |
| 721 fragments.append((chain, start, end)) | |
| 722 self.fragments = tuple(fragments) | |
| 723 | |
| 724 def __str__(self): | |
| 725 prefix ="" | |
| 726 if self.pdbid : | |
| 727 prefix =self.pdbid +' ' | |
| 728 | |
| 729 if not self.fragments: return prefix+'-' | |
| 730 strs = [] | |
| 731 for chain, start, end in self.fragments: | |
| 732 s = [] | |
| 733 if chain: s.append("%s:" % chain) | |
| 734 if start: s.append("%s-%s" % (start, end)) | |
| 735 strs.append("".join(s)) | |
| 736 return prefix+ ",".join(strs) | |
| 737 # End Residues | |
| 738 | |
| 739 | |
| 740 | |
| 741 | |
| 742 | |
| 743 | |
| 744 | |
| 745 | |
| 746 | |
| 747 | |
| 748 | |
| 749 | |
| 750 | |
| 751 | |
| 752 | |
| 753 | |
| 754 | |
| 755 | |
| 756 | |
| 757 |
