| 0 | 1 | 
|  | 2 # Copyright 2000 by Jeffrey Chang.  All rights reserved. | 
|  | 3 # Copyright 2001 by Gavin E. Crooks.  All rights reserved. | 
|  | 4 # Modifications Copyright 2004/2005 James Casbon. | 
|  | 5 # Copyright 2005 by Regents of the University of California. All rights Reserved. | 
|  | 6 #   (Major rewrite for conformance to corebio. Gavin Crooks) | 
|  | 7 # | 
|  | 8 # This code is derived from the Biopython distribution and is governed by it's | 
|  | 9 # license.  Please see the LICENSE file that should have been included | 
|  | 10 # as part of this package. | 
|  | 11 | 
|  | 12 | 
|  | 13 """ SCOP: Structural Classification of Proteins. | 
|  | 14 | 
|  | 15 The SCOP database aims to provide a manually constructed classification of | 
|  | 16 all know protein structures into a hierarchy, the main levels of which | 
|  | 17 are family, superfamily and fold. | 
|  | 18 | 
|  | 19 * SCOP: http://scop.mrc-lmb.cam.ac.uk/scop/ | 
|  | 20 * Introduction: http://scop.mrc-lmb.cam.ac.uk/scop/intro.html | 
|  | 21 * SCOP parsable files: http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | 
|  | 22 | 
|  | 23 The Scop object in this module represents the entire SCOP classification. It | 
|  | 24 can be built from the three SCOP parsable files (see DesRecord, HieRecord and | 
|  | 25 ClaRecord), modified is so desired, and converted back to the same file formats. | 
|  | 26 A single SCOP domain (represented by the Domain class) can be obtained from | 
|  | 27 Scop using the domain's SCOP identifier (sid). | 
|  | 28 | 
|  | 29 Classes: | 
|  | 30  - Scop     -- The entire SCOP hierarchy. | 
|  | 31  - Node     -- A node in the SCOP hierarchy. | 
|  | 32  - Domain   -- A SCOP domain. | 
|  | 33  - Residues -- A collection of residues from a PDB structure. | 
|  | 34  - HieRecord -- Handle the SCOP HIErarchy files. | 
|  | 35  - DesRecord -- Handle the SCOP DEScription file. | 
|  | 36  - ClaRecord -- Handle the SCOP CLAssification file. | 
|  | 37 | 
|  | 38 | 
|  | 39 nodeCodeDict  -- A mapping between known 2 letter node codes and a longer | 
|  | 40                   description. The known node types are 'cl' (class), 'cf' | 
|  | 41                   (fold), 'sf' (superfamily), 'fa' (family), 'dm' (domain), | 
|  | 42                   'sp' (species), 'px' (domain). Additional node types may | 
|  | 43                   be added in the future. | 
|  | 44 """ | 
|  | 45 | 
|  | 46 import os, re | 
|  | 47 | 
|  | 48 | 
|  | 49 nodeCodeDict = { 'cl':'class', 'cf':'fold', 'sf':'superfamily', | 
|  | 50                  'fa':'family', 'dm':'protein', 'sp':'species', 'px':'domain'} | 
|  | 51 | 
|  | 52 | 
|  | 53 _nodetype_to_code= dict([[v,k] for k,v in nodeCodeDict.items()]) | 
|  | 54 | 
|  | 55 | 
|  | 56 nodeCodeOrder = [ 'ro', 'cl', 'cf', 'sf', 'fa', 'dm', 'sp', 'px' ] | 
|  | 57 | 
|  | 58 | 
|  | 59 def cmp_sccs(sccs1, sccs2) : | 
|  | 60     """Order SCOP concise classification strings (sccs). | 
|  | 61 | 
|  | 62     a.4.5.1 < a.4.5.11 < b.1.1.1 | 
|  | 63 | 
|  | 64     A sccs (e.g. a.4.5.11) compactly represents a domain's classification. | 
|  | 65     The letter represents the class, and the numbers are the fold, | 
|  | 66     superfamily, and family, respectively. | 
|  | 67 | 
|  | 68     """ | 
|  | 69 | 
|  | 70     s1 = sccs1.split(".") | 
|  | 71     s2 = sccs2.split(".") | 
|  | 72 | 
|  | 73     if s1[0] != s2[0]: return cmp(s1[0], s2[0]) | 
|  | 74 | 
|  | 75     s1 = map(int, s1[1:]) | 
|  | 76     s2 = map(int, s2[1:]) | 
|  | 77 | 
|  | 78     return cmp(s1,s2) | 
|  | 79 | 
|  | 80 | 
|  | 81 | 
|  | 82 def _open_scop_file(scop_dir_path, version, filetype) : | 
|  | 83     filename = "dir.%s.scop.txt_%s" % (filetype,version) | 
|  | 84     afile = open(os.path.join( scop_dir_path, filename)) | 
|  | 85     return afile | 
|  | 86 | 
|  | 87 | 
|  | 88 class Scop(object): | 
|  | 89     """The entire SCOP hierarchy. | 
|  | 90 | 
|  | 91     root             -- The root node of the hierarchy | 
|  | 92     domains          -- A list of all domains | 
|  | 93     nodes_by_sid     -- A dictionary of nodes indexed by SCOP identifier | 
|  | 94                         (e.g. 'd1hbia_') | 
|  | 95     domains_by_sunid -- A dictionary of domains indexed by SCOP uniquie | 
|  | 96                         identifiers (e.g. 14996) | 
|  | 97     """ | 
|  | 98     def __init__(self): | 
|  | 99         """ An empty Scop object. | 
|  | 100 | 
|  | 101         See also Scop.parse() and Scop.parse_files() | 
|  | 102         """ | 
|  | 103         self.root = None | 
|  | 104         self.domains = [] | 
|  | 105         self.nodes_by_sunid = dict() | 
|  | 106         self.domains_by_sid = dict() | 
|  | 107 | 
|  | 108     #@classmethod | 
|  | 109     def parse(cls, dir_path, version) : | 
|  | 110         """Build the SCOP hierarchy from the SCOP parsable files. | 
|  | 111 | 
|  | 112          - dir_path -- A directory that contains the SCOP files | 
|  | 113          - version  -- The SCOP version (as a string) | 
|  | 114 | 
|  | 115         The SCOP files are named dir.XXX.scop.txt_VERSION, where XXX | 
|  | 116         is 'cla', 'des' or 'hie'. | 
|  | 117         """ | 
|  | 118         cla_file = None | 
|  | 119         des_file = None | 
|  | 120         hie_file = None | 
|  | 121         try : | 
|  | 122             cla_file = _open_scop_file( dir_path, version, 'cla') | 
|  | 123             des_file = _open_scop_file( dir_path, version, 'des') | 
|  | 124             hie_file = _open_scop_file( dir_path, version, 'hie') | 
|  | 125             scop = cls.parse_files(cla_file, des_file, hie_file) | 
|  | 126         finally : | 
|  | 127             # If we opened the files, we close the files | 
|  | 128             if cla_file : cla_file.close() | 
|  | 129             if des_file : des_file.close() | 
|  | 130             if hie_file : hie_file.close() | 
|  | 131 | 
|  | 132         return scop | 
|  | 133     parse = classmethod(parse) | 
|  | 134 | 
|  | 135     #@classmethod | 
|  | 136     def parse_files(cls, cla_file, des_file, hie_file): | 
|  | 137         """Build the SCOP hierarchy from the SCOP parsable files. | 
|  | 138 | 
|  | 139             - cla_file -- the CLA clasification file | 
|  | 140             - des_file -- the DES description file | 
|  | 141             - hie_file -- the HIE hierarchy file | 
|  | 142         """ | 
|  | 143 | 
|  | 144         self = cls() | 
|  | 145 | 
|  | 146         sunidDict = {} | 
|  | 147 | 
|  | 148         root = Node() | 
|  | 149         domains = [] | 
|  | 150         root.sunid=0 | 
|  | 151         root.type='ro' | 
|  | 152         sunidDict[root.sunid] = root | 
|  | 153 | 
|  | 154         root.description = 'SCOP Root' | 
|  | 155 | 
|  | 156         # Build the rest of the nodes using the DES file | 
|  | 157         for rec in DesRecord.records(des_file): | 
|  | 158             if rec.nodetype =='px' : | 
|  | 159                 n = Domain() | 
|  | 160                 n.sid = rec.name | 
|  | 161                 domains.append(n) | 
|  | 162             else : | 
|  | 163                 n = Node() | 
|  | 164             n.sunid = rec.sunid | 
|  | 165             n.type = rec.nodetype | 
|  | 166             n.sccs = rec.sccs | 
|  | 167             n.description = rec.description | 
|  | 168 | 
|  | 169             sunidDict[n.sunid] = n | 
|  | 170 | 
|  | 171         # Glue all of the Nodes together using the HIE file | 
|  | 172         for rec in HieRecord.records(hie_file): | 
|  | 173             if not rec.sunid in sunidDict : | 
|  | 174                 print rec.sunid                     #FIXME: HUH? | 
|  | 175 | 
|  | 176             n = sunidDict[rec.sunid] | 
|  | 177             if rec.parent !='': # Not root node | 
|  | 178                 if not rec.parent in sunidDict : | 
|  | 179                     raise ValueError("Incomplete data?") | 
|  | 180                 n.parent = sunidDict[rec.parent] | 
|  | 181 | 
|  | 182             for c in rec.children: | 
|  | 183                 if not c in sunidDict : | 
|  | 184                     raise ValueError("Incomplete data?") | 
|  | 185                 n.children.append(sunidDict[c]) | 
|  | 186 | 
|  | 187 | 
|  | 188         # Fill in the gaps with information from the CLA file | 
|  | 189         sidDict = {} | 
|  | 190         for rec in ClaRecord.records(cla_file): | 
|  | 191             n = sunidDict[rec.sunid] | 
|  | 192             assert n.sccs == rec.sccs | 
|  | 193             assert n.sid == rec.sid | 
|  | 194             n.residues = rec.residues | 
|  | 195             sidDict[n.sid] = n | 
|  | 196 | 
|  | 197         # Clean up | 
|  | 198         self.root = root | 
|  | 199         self.nodes_by_sunid = sunidDict | 
|  | 200         self.domains_by_sid = sidDict | 
|  | 201         self.domains = tuple(domains) | 
|  | 202 | 
|  | 203         return self | 
|  | 204     parse_files = classmethod(parse_files) | 
|  | 205 | 
|  | 206 | 
|  | 207     def write_hie(self, stream) : | 
|  | 208         """Build an HIE SCOP parsable file from this object""" | 
|  | 209         nodes = self.nodes_by_sunid.values() | 
|  | 210         # We order nodes to ease comparison with original file | 
|  | 211         nodes.sort(lambda n1,n2: cmp(n1.sunid, n2.sunid)) | 
|  | 212 | 
|  | 213         for n in nodes : | 
|  | 214             stream.write(str(n.to_hie_record())) | 
|  | 215 | 
|  | 216 | 
|  | 217     def write_des(self, stream) : | 
|  | 218         """Build a DES SCOP parsable file from this object""" | 
|  | 219         nodes = self.nodes_by_sunid.values() | 
|  | 220         # Origional SCOP file is not ordered? | 
|  | 221         nodes.sort(lambda n1,n2: cmp(n1.sunid, n2.sunid)) | 
|  | 222 | 
|  | 223         for n in nodes : | 
|  | 224             if n != self.root : | 
|  | 225                 stream.write(str(n.to_des_record())) | 
|  | 226 | 
|  | 227 | 
|  | 228     def write_cla(self, stream) : | 
|  | 229         """Build a CLA SCOP parsable file from this object""" | 
|  | 230         nodes = self.domains_by_sid.values() | 
|  | 231         # We order nodes to ease comparison with original file | 
|  | 232         nodes.sort(lambda n1,n2: cmp(n1.sunid, n2.sunid)) | 
|  | 233 | 
|  | 234         for n in nodes : | 
|  | 235             stream.write(str(n.to_cla_record())) | 
|  | 236 # End Scop | 
|  | 237 | 
|  | 238 | 
|  | 239 | 
|  | 240 class Node(object) : | 
|  | 241     """ A node in the Scop hierarchy | 
|  | 242 | 
|  | 243     sunid  -- SCOP unique identifiers. e.g. '14986' | 
|  | 244     parent -- The parent node | 
|  | 245     children -- A list of child nodes | 
|  | 246     sccs     -- SCOP concise classification string. e.g. 'a.1.1.2' | 
|  | 247     type     -- A 2 letter node type code. e.g. 'px' for domains | 
|  | 248     description -- | 
|  | 249 | 
|  | 250     """ | 
|  | 251     def __init__(self) : | 
|  | 252         """A new, uninitilized SCOP node.""" | 
|  | 253         self.sunid='' | 
|  | 254         self.parent = None | 
|  | 255         self.children=[] | 
|  | 256         self.sccs = '' | 
|  | 257         self.type ='' | 
|  | 258         self.description ='' | 
|  | 259 | 
|  | 260     def __str__(self) : | 
|  | 261         s = [] | 
|  | 262         s.append(str(self.sunid)) | 
|  | 263         s.append(self.sccs) | 
|  | 264         s.append(self.type) | 
|  | 265         s.append(self.description) | 
|  | 266 | 
|  | 267         return " ".join(s) | 
|  | 268 | 
|  | 269     def to_hie_record(self): | 
|  | 270         """Return an Hie.Record""" | 
|  | 271         rec = HieRecord() | 
|  | 272         rec.sunid = str(self.sunid) | 
|  | 273         if self.parent : # Not root node | 
|  | 274             rec.parent = str(self.parent.sunid) | 
|  | 275         else: | 
|  | 276             rec.parent = '-' | 
|  | 277         for c in self.children : | 
|  | 278             rec.children.append(str(c.sunid)) | 
|  | 279         return rec | 
|  | 280 | 
|  | 281     def to_des_record(self): | 
|  | 282         """Return a Des.Record""" | 
|  | 283         rec = DesRecord() | 
|  | 284         rec.sunid = str(self.sunid) | 
|  | 285         rec.nodetype = self.type | 
|  | 286         rec.sccs = self.sccs | 
|  | 287         rec.description = self.description | 
|  | 288         return rec | 
|  | 289 | 
|  | 290     def descendents( self, node_type) : | 
|  | 291         """ Return a list of all decendent nodes of the given type. Node type | 
|  | 292         can be a two letter code or longer description. e.g. 'fa' or 'family' | 
|  | 293         """ | 
|  | 294         if node_type in _nodetype_to_code: | 
|  | 295             node_type = _nodetype_to_code[node_type] | 
|  | 296 | 
|  | 297         nodes = [self] | 
|  | 298 | 
|  | 299         while nodes[0].type != node_type: | 
|  | 300             if nodes[0].type == 'px' : | 
|  | 301                 return [] # Fell of the bottom of the hierarchy | 
|  | 302             child_list = [] | 
|  | 303             for n in nodes: | 
|  | 304                 for child in n.children: | 
|  | 305                     child_list.append( child ) | 
|  | 306                 nodes = child_list | 
|  | 307 | 
|  | 308         return nodes | 
|  | 309 | 
|  | 310 | 
|  | 311     def ascendent( self, node_type) : | 
|  | 312         """ Return the ancestor node of the given type, or None. Node type can | 
|  | 313         be a two letter code or longer description. e.g. 'fa' or 'family' | 
|  | 314         """ | 
|  | 315         if node_type in _nodetype_to_code : | 
|  | 316             node_type = _nodetype_to_code[node_type] | 
|  | 317 | 
|  | 318         n = self | 
|  | 319         if n.type == node_type: return None | 
|  | 320         while n.type != node_type: | 
|  | 321             if n.type == 'ro': | 
|  | 322                 return None # Fell of the top of the hierarchy | 
|  | 323             n = n.parent | 
|  | 324 | 
|  | 325         return n | 
|  | 326 # End Node | 
|  | 327 | 
|  | 328 | 
|  | 329 class Domain(Node) : | 
|  | 330     """ A SCOP domain. A leaf node in the Scop hierarchy. | 
|  | 331 | 
|  | 332     - sid      -- The SCOP domain identifier. e.g. 'd5hbib_' | 
|  | 333     - residues -- A Residue object. It defines the collection | 
|  | 334                   of PDB atoms that make up this domain. | 
|  | 335     """ | 
|  | 336     def __init__(self) : | 
|  | 337         Node.__init__(self) | 
|  | 338         self.sid = '' | 
|  | 339         self.residues = None | 
|  | 340 | 
|  | 341     def __str__(self) : | 
|  | 342         s = [] | 
|  | 343         s.append(self.sid) | 
|  | 344         s.append(self.sccs) | 
|  | 345         s.append("("+str(self.residues)+")") | 
|  | 346 | 
|  | 347         if not self.parent : | 
|  | 348             s.append(self.description) | 
|  | 349         else : | 
|  | 350             sp = self.parent | 
|  | 351             dm = sp.parent | 
|  | 352             s.append(dm.description) | 
|  | 353             s.append("{"+sp.description+"}") | 
|  | 354 | 
|  | 355         return " ".join(s) | 
|  | 356 | 
|  | 357     def to_des_record(self): | 
|  | 358         """Return a des.Record""" | 
|  | 359         rec = Node.to_des_record(self) | 
|  | 360         rec.name = self.sid | 
|  | 361         return rec | 
|  | 362 | 
|  | 363     def to_cla_record(self) : | 
|  | 364         """Return a cla.Record""" | 
|  | 365         rec = ClaRecord() | 
|  | 366         rec.sid = self.sid | 
|  | 367         rec.residues = self.residues | 
|  | 368         rec.sccs = self.sccs | 
|  | 369         rec.sunid = self.sunid | 
|  | 370 | 
|  | 371         n = self | 
|  | 372         while n.sunid != 0: # Not root node | 
|  | 373             rec.hierarchy.append( (n.type, str(n.sunid)) ) | 
|  | 374             n = n.parent | 
|  | 375 | 
|  | 376         rec.hierarchy.reverse() | 
|  | 377 | 
|  | 378         return rec | 
|  | 379 # End Domain | 
|  | 380 | 
|  | 381 | 
|  | 382 | 
|  | 383 class DesRecord(object): | 
|  | 384     """ Handle the SCOP DEScription file. | 
|  | 385 | 
|  | 386     The file format is described in the scop | 
|  | 387     "release notes.":http://scop.berkeley.edu/release-notes-1.55.html | 
|  | 388     The latest DES file can be found | 
|  | 389     "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | 
|  | 390 | 
|  | 391     The DES file consisnt of one DES record per line. Each record | 
|  | 392     holds information for one node in the SCOP hierarchy, and consist | 
|  | 393     of 5 tab deliminated fields, | 
|  | 394     sunid, node type, sccs, node name, node description. | 
|  | 395 | 
|  | 396     For example :: | 
|  | 397 | 
|  | 398     21953   px      b.1.2.1 d1dan.1 1dan T:,U:91-106 | 
|  | 399     48724   cl      b       -       All beta proteins | 
|  | 400     48725   cf      b.1     -       Immunoglobulin-like beta-sandwich | 
|  | 401     49265   sf      b.1.2   -       Fibronectin type III | 
|  | 402     49266   fa      b.1.2.1 -       Fibronectin type III | 
|  | 403 | 
|  | 404 | 
|  | 405     - sunid       -- SCOP unique identifiers | 
|  | 406     - nodetype    -- One of 'cl' (class), 'cf' (fold), 'sf' (superfamily), | 
|  | 407                    'fa' (family), 'dm' (protein), 'sp' (species), | 
|  | 408                    'px' (domain). Additional node types may be added. | 
|  | 409     - sccs        -- SCOP concise classification strings. e.g. b.1.2.1 | 
|  | 410     - name        -- The SCOP ID (sid) for domains (e.g. d1anu1), | 
|  | 411                    currently empty for other node types | 
|  | 412     - description --  e.g. "All beta proteins","Fibronectin type III", | 
|  | 413     """ | 
|  | 414     def __init__(self, record=None): | 
|  | 415 | 
|  | 416         if not record : | 
|  | 417             self.sunid = '' | 
|  | 418             self.nodetype = '' | 
|  | 419             self.sccs = '' | 
|  | 420             self.name = '' | 
|  | 421             self.description ='' | 
|  | 422         else : | 
|  | 423             entry = record.rstrip()  # no trailing whitespace | 
|  | 424             columns = entry.split("\t")  # separate the tab-delineated cols | 
|  | 425             if len(columns) != 5: | 
|  | 426                 raise ValueError("I don't understand the format of %s" % entry) | 
|  | 427 | 
|  | 428             self.sunid, self.nodetype, self.sccs, self.name, self.description \ | 
|  | 429                 = columns | 
|  | 430             if self.name == '-' : self.name ='' | 
|  | 431             self.sunid = int(self.sunid) | 
|  | 432 | 
|  | 433     def __str__(self): | 
|  | 434         s = [] | 
|  | 435         s.append(self.sunid) | 
|  | 436         s.append(self.nodetype) | 
|  | 437         s.append(self.sccs) | 
|  | 438         if self.name : | 
|  | 439             s.append(self.name) | 
|  | 440         else : | 
|  | 441             s.append("-") | 
|  | 442         s.append(self.description) | 
|  | 443         return "\t".join(map(str,s)) + "\n" | 
|  | 444 | 
|  | 445     #@staticmethod | 
|  | 446     def records(des_file): | 
|  | 447         """Iterates over a DES file, generating DesRecords """ | 
|  | 448         for line in des_file: | 
|  | 449             if line[0] =='#':  continue  # A comment | 
|  | 450             if line.isspace() : continue | 
|  | 451             yield DesRecord(line) | 
|  | 452     records = staticmethod(records) | 
|  | 453 # End DesRecord | 
|  | 454 | 
|  | 455 class HieRecord(object): | 
|  | 456     """Handle the SCOP HIErarchy files, which describe the SCOP hierarchy in | 
|  | 457     terms of SCOP unique identifiers (sunid). | 
|  | 458 | 
|  | 459     The file format is described in the scop | 
|  | 460     "release notes.":http://scop.berkeley.edu/release-notes-1.55.html | 
|  | 461     The latest HIE file can be found | 
|  | 462     "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | 
|  | 463 | 
|  | 464     "Release 1.55":http://scop.berkeley.edu/parse/dir.hie.scop.txt_1.55 | 
|  | 465     Records consist of 3 tab deliminated fields; node's sunid, | 
|  | 466     parent's sunid, and a list of children's sunids. For example :: | 
|  | 467 | 
|  | 468     0       -       46456,48724,51349,53931,56572,56835,56992,57942 | 
|  | 469     21953   49268   - | 
|  | 470     49267   49266   49268,49269 | 
|  | 471 | 
|  | 472     Each record holds information for one node in the SCOP hierarchy. | 
|  | 473 | 
|  | 474     sunid      -- SCOP unique identifiers of this node | 
|  | 475     parent     -- Parents sunid | 
|  | 476     children   -- Sequence of childrens sunids | 
|  | 477     """ | 
|  | 478     def __init__(self, record = None): | 
|  | 479         self.sunid = None | 
|  | 480         self.parent = None | 
|  | 481         self.children = [] | 
|  | 482 | 
|  | 483         if not record : return | 
|  | 484 | 
|  | 485         # Parses HIE records. | 
|  | 486         entry = record.rstrip()        # no trailing whitespace | 
|  | 487         columns = entry.split('\t')   # separate the tab-delineated cols | 
|  | 488         if len(columns) != 3: | 
|  | 489             raise ValueError("I don't understand the format of %s" % entry) | 
|  | 490 | 
|  | 491         self.sunid, self.parent, children = columns | 
|  | 492 | 
|  | 493         if self.sunid =='-' : self.sunid = '' | 
|  | 494         if self.parent =='-' : self.parent = '' | 
|  | 495         else : self.parent = int( self.parent ) | 
|  | 496 | 
|  | 497         if children =='-' : | 
|  | 498             self.children = () | 
|  | 499         else : | 
|  | 500             self.children = children.split(',') | 
|  | 501             self.children = map ( int, self.children ) | 
|  | 502 | 
|  | 503         self.sunid = int(self.sunid) | 
|  | 504 | 
|  | 505     def __str__(self): | 
|  | 506         s = [] | 
|  | 507         s.append(str(self.sunid)) | 
|  | 508 | 
|  | 509         if self.parent: | 
|  | 510             s.append(str(self.parent)) | 
|  | 511         else: | 
|  | 512             if self.sunid != 0: | 
|  | 513                 s.append('0') | 
|  | 514             else: | 
|  | 515                 s.append('-') | 
|  | 516 | 
|  | 517         if self.children : | 
|  | 518             child_str = map(str, self.children) | 
|  | 519             s.append(",".join(child_str)) | 
|  | 520         else: | 
|  | 521             s.append('-') | 
|  | 522 | 
|  | 523         return "\t".join(s) + "\n" | 
|  | 524 | 
|  | 525 | 
|  | 526     #@staticmethod | 
|  | 527     def records(hie_file): | 
|  | 528         """Iterates over a DOM file, generating DomRecords """ | 
|  | 529         for line in hie_file: | 
|  | 530             if line[0] =='#':  continue  # A comment | 
|  | 531             if line.isspace() : continue | 
|  | 532             yield HieRecord(line) | 
|  | 533     records = staticmethod(records) | 
|  | 534 # End HieRecord | 
|  | 535 | 
|  | 536 | 
|  | 537 | 
|  | 538 class ClaRecord(object): | 
|  | 539     """Handle the SCOP CLAssification file, which describes SCOP domains. | 
|  | 540 | 
|  | 541     The file format is described in the scop | 
|  | 542     "release notes.":http://scop.berkeley.edu/release-notes-1.55.html | 
|  | 543     The latest CLA file can be found | 
|  | 544     "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | 
|  | 545 | 
|  | 546     sid         --  SCOP identifier. e.g. d1danl2 | 
|  | 547     residues    --  The domain definition as a Residues object | 
|  | 548     sccs        --  SCOP concise classification strings.  e.g. b.1.2.1 | 
|  | 549     sunid       --  SCOP unique identifier for this domain | 
|  | 550     hierarchy   --  A sequence of tuples (nodetype, sunid) describing the | 
|  | 551                     location of this domain in the SCOP hierarchy. | 
|  | 552                     See the Scop module for a description of nodetypes. | 
|  | 553     """ | 
|  | 554     def __init__(self, record=None): | 
|  | 555         self.sid = '' | 
|  | 556         self.residues = None | 
|  | 557         self.sccs = '' | 
|  | 558         self.sunid ='' | 
|  | 559         self.hierarchy = [] | 
|  | 560 | 
|  | 561         if not record: return | 
|  | 562 | 
|  | 563         # Parse a tab-deliminated CLA record. | 
|  | 564         entry = record.rstrip()        # no trailing whitespace | 
|  | 565         columns = entry.split('\t')   # separate the tab-delineated cols | 
|  | 566         if len(columns) != 6: | 
|  | 567             raise ValueError("I don't understand the format of %s" % entry) | 
|  | 568 | 
|  | 569         self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns | 
|  | 570         self.residues = Residues(residues) | 
|  | 571         self.residues.pdbid = pdbid | 
|  | 572         self.sunid = int(self.sunid) | 
|  | 573 | 
|  | 574         h = [] | 
|  | 575         for ht in hierarchy.split(",") : | 
|  | 576             h.append( ht.split('=')) | 
|  | 577         for ht in h: | 
|  | 578             ht[1] = int(ht[1]) | 
|  | 579         self.hierarchy = h | 
|  | 580 | 
|  | 581     def __str__(self): | 
|  | 582         s = [] | 
|  | 583         s.append(self.sid) | 
|  | 584         s += str(self.residues).split(" ") | 
|  | 585         s.append(self.sccs) | 
|  | 586         s.append(self.sunid) | 
|  | 587 | 
|  | 588         h=[] | 
|  | 589         for ht in self.hierarchy: | 
|  | 590              h.append("=".join(map(str,ht))) | 
|  | 591         s.append(",".join(h)) | 
|  | 592 | 
|  | 593         return "\t".join(map(str,s)) + "\n" | 
|  | 594 | 
|  | 595     #@staticmethod | 
|  | 596     def records(cla_file): | 
|  | 597         """Iterates over a DOM file, generating DomRecords """ | 
|  | 598         for line in cla_file: | 
|  | 599             if line[0] =='#':  continue  # A comment | 
|  | 600             if line.isspace() : continue | 
|  | 601             yield ClaRecord(line) | 
|  | 602     records = staticmethod(records) | 
|  | 603 | 
|  | 604 # End ClaRecord | 
|  | 605 | 
|  | 606 | 
|  | 607 | 
|  | 608 | 
|  | 609 class DomRecord(object): | 
|  | 610     """Handle the SCOP DOMain file. | 
|  | 611 | 
|  | 612     The DOM file has been officially deprecated. For more information see | 
|  | 613     the SCOP"release notes.":http://scop.berkeley.edu/release-notes-1.55.html | 
|  | 614     The DOM files for older releases can be found | 
|  | 615     "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | 
|  | 616 | 
|  | 617     DOM records consist of 4 tab deliminated fields; | 
|  | 618     sid, pdbid, residues, hierarchy | 
|  | 619     For example :: | 
|  | 620 | 
|  | 621     d1sctg_ 1sct    g:      1.001.001.001.001.001 | 
|  | 622     d1scth_ 1sct    h:      1.001.001.001.001.001 | 
|  | 623     d1flp__ 1flp    -       1.001.001.001.001.002 | 
|  | 624     d1moh__ 1moh    -       1.001.001.001.001.002 | 
|  | 625 | 
|  | 626     sid -- The SCOP ID of the entry, e.g. d1anu1 | 
|  | 627     residues -- The domain definition as a Residues object | 
|  | 628     hierarchy -- A string specifying where this domain is in the hierarchy. | 
|  | 629     """ | 
|  | 630     def __init__(self, record= None): | 
|  | 631         self.sid = '' | 
|  | 632         self.residues = [] | 
|  | 633         self.hierarchy = '' | 
|  | 634 | 
|  | 635         if record: | 
|  | 636             entry = record.rstrip()  # no trailing whitespace | 
|  | 637             columns = entry.split("\t")  # separate the tab-delineated cols | 
|  | 638             if len(columns) != 4: | 
|  | 639                 raise ValueError("I don't understand the format of %s" % entry) | 
|  | 640             self.sid, pdbid, res, self.hierarchy = columns | 
|  | 641             self.residues = Residues(res) | 
|  | 642             self.residues.pdbid = pdbid | 
|  | 643 | 
|  | 644     def __str__(self): | 
|  | 645         s = [] | 
|  | 646         s.append(self.sid) | 
|  | 647         s.append(str(self.residues).replace(" ","\t") ) | 
|  | 648         s.append(self.hierarchy) | 
|  | 649         return "\t".join(s) + "\n" | 
|  | 650 | 
|  | 651     #@staticmethod | 
|  | 652     def records(dom_file): | 
|  | 653         """Iterates over a DOM file, generating DomRecords """ | 
|  | 654         for line in dom_file: | 
|  | 655             if line[0] =='#':  continue  # A comment | 
|  | 656             if line.isspace() : continue | 
|  | 657             yield DomRecord(line) | 
|  | 658     records = staticmethod(records) | 
|  | 659 # End DomRecord | 
|  | 660 | 
|  | 661 | 
|  | 662 | 
|  | 663 | 
|  | 664 _pdbid_re = re.compile(r"^(\w\w\w\w)(?:$|\s+|_)(.*)") | 
|  | 665 _fragment_re = re.compile(r"\(?(\w:)?(-?\w*)-?(-?\w*)\)?(.*)") | 
|  | 666 | 
|  | 667 class Residues(object) : | 
|  | 668     """A collection of residues from a PDB structure. | 
|  | 669 | 
|  | 670     This class provides code to work with SCOP domain definitions. These | 
|  | 671     are concisely expressed as a one or more chain fragments. For example, | 
|  | 672     "(1bba A:10-20,B:)" indicates residue 10 through 20 (inclusive) of | 
|  | 673     chain A, and every residue of chain B in the pdb structure 1bba. The pdb | 
|  | 674     id and brackets are optional. In addition "-" indicates every residue of | 
|  | 675     a pbd structure with one unnamed chain. | 
|  | 676 | 
|  | 677     Start and end residue ids consist of the residue sequence number and an | 
|  | 678     optional single letter insertion code. e.g. "12", "-1", "1a", "1000" | 
|  | 679 | 
|  | 680 | 
|  | 681     pdbid -- An optional PDB id, e.g. "1bba" | 
|  | 682     fragments -- A sequence of tuples (chainID, startResID, endResID) | 
|  | 683     """ | 
|  | 684 | 
|  | 685 | 
|  | 686     def __init__(self, str=None) : | 
|  | 687         self.pdbid = '' | 
|  | 688         self.fragments = () | 
|  | 689         if str is not None : self._parse(str) | 
|  | 690 | 
|  | 691 | 
|  | 692     def _parse(self, string): | 
|  | 693         string = string.strip() | 
|  | 694 | 
|  | 695         #Is there a pdbid at the front? e.g. 1bba A:1-100 | 
|  | 696         m = _pdbid_re.match(string) | 
|  | 697         if m is not None : | 
|  | 698             self.pdbid = m.group(1) | 
|  | 699             string = m.group(2) # Everything else | 
|  | 700 | 
|  | 701         if string=='' or string == '-' or string=='(-)':  # no fragments, whole sequence | 
|  | 702             return | 
|  | 703 | 
|  | 704         fragments = [] | 
|  | 705         for l in string.split(",") : | 
|  | 706             m = _fragment_re.match(l) | 
|  | 707             if m is None: | 
|  | 708                 raise ValueError("I don't understand the format of %s" % l) | 
|  | 709             chain, start, end, postfix = m.groups() | 
|  | 710 | 
|  | 711             if postfix != "" : | 
|  | 712                  raise ValueError("I don't understand the format of %s" % l ) | 
|  | 713 | 
|  | 714             if chain: | 
|  | 715                 if chain[-1] != ':': | 
|  | 716                     raise ValueError("I don't understand the chain in %s" % l) | 
|  | 717                 chain = chain[:-1]   # chop off the ':' | 
|  | 718             else : | 
|  | 719                 chain ="" | 
|  | 720 | 
|  | 721             fragments.append((chain, start, end)) | 
|  | 722         self.fragments = tuple(fragments) | 
|  | 723 | 
|  | 724     def __str__(self): | 
|  | 725         prefix ="" | 
|  | 726         if self.pdbid : | 
|  | 727             prefix =self.pdbid +' ' | 
|  | 728 | 
|  | 729         if not self.fragments: return prefix+'-' | 
|  | 730         strs = [] | 
|  | 731         for chain, start, end in self.fragments: | 
|  | 732             s = [] | 
|  | 733             if chain: s.append("%s:" % chain) | 
|  | 734             if start: s.append("%s-%s" % (start, end)) | 
|  | 735             strs.append("".join(s)) | 
|  | 736         return prefix+ ",".join(strs) | 
|  | 737 # End Residues | 
|  | 738 | 
|  | 739 | 
|  | 740 | 
|  | 741 | 
|  | 742 | 
|  | 743 | 
|  | 744 | 
|  | 745 | 
|  | 746 | 
|  | 747 | 
|  | 748 | 
|  | 749 | 
|  | 750 | 
|  | 751 | 
|  | 752 | 
|  | 753 | 
|  | 754 | 
|  | 755 | 
|  | 756 | 
|  | 757 |