Mercurial > repos > davidmurphy > codonlogo
comparison corebio/resource/scop.py @ 7:8d676bbd1f2d
Uploaded
author | davidmurphy |
---|---|
date | Mon, 16 Jan 2012 07:03:36 -0500 |
parents | c55bdc2fb9fa |
children |
comparison
equal
deleted
inserted
replaced
6:4a4aca3d57c9 | 7:8d676bbd1f2d |
---|---|
1 | |
2 # Copyright 2000 by Jeffrey Chang. All rights reserved. | |
3 # Copyright 2001 by Gavin E. Crooks. All rights reserved. | |
4 # Modifications Copyright 2004/2005 James Casbon. | |
5 # Copyright 2005 by Regents of the University of California. All rights Reserved. | |
6 # (Major rewrite for conformance to corebio. Gavin Crooks) | |
7 # | |
8 # This code is derived from the Biopython distribution and is governed by it's | |
9 # license. Please see the LICENSE file that should have been included | |
10 # as part of this package. | |
11 | |
12 | |
13 """ SCOP: Structural Classification of Proteins. | |
14 | |
15 The SCOP database aims to provide a manually constructed classification of | |
16 all know protein structures into a hierarchy, the main levels of which | |
17 are family, superfamily and fold. | |
18 | |
19 * SCOP: http://scop.mrc-lmb.cam.ac.uk/scop/ | |
20 * Introduction: http://scop.mrc-lmb.cam.ac.uk/scop/intro.html | |
21 * SCOP parsable files: http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
22 | |
23 The Scop object in this module represents the entire SCOP classification. It | |
24 can be built from the three SCOP parsable files (see DesRecord, HieRecord and | |
25 ClaRecord), modified is so desired, and converted back to the same file formats. | |
26 A single SCOP domain (represented by the Domain class) can be obtained from | |
27 Scop using the domain's SCOP identifier (sid). | |
28 | |
29 Classes: | |
30 - Scop -- The entire SCOP hierarchy. | |
31 - Node -- A node in the SCOP hierarchy. | |
32 - Domain -- A SCOP domain. | |
33 - Residues -- A collection of residues from a PDB structure. | |
34 - HieRecord -- Handle the SCOP HIErarchy files. | |
35 - DesRecord -- Handle the SCOP DEScription file. | |
36 - ClaRecord -- Handle the SCOP CLAssification file. | |
37 | |
38 | |
39 nodeCodeDict -- A mapping between known 2 letter node codes and a longer | |
40 description. The known node types are 'cl' (class), 'cf' | |
41 (fold), 'sf' (superfamily), 'fa' (family), 'dm' (domain), | |
42 'sp' (species), 'px' (domain). Additional node types may | |
43 be added in the future. | |
44 """ | |
45 | |
46 import os, re | |
47 | |
48 | |
49 nodeCodeDict = { 'cl':'class', 'cf':'fold', 'sf':'superfamily', | |
50 'fa':'family', 'dm':'protein', 'sp':'species', 'px':'domain'} | |
51 | |
52 | |
53 _nodetype_to_code= dict([[v,k] for k,v in nodeCodeDict.items()]) | |
54 | |
55 | |
56 nodeCodeOrder = [ 'ro', 'cl', 'cf', 'sf', 'fa', 'dm', 'sp', 'px' ] | |
57 | |
58 | |
59 def cmp_sccs(sccs1, sccs2) : | |
60 """Order SCOP concise classification strings (sccs). | |
61 | |
62 a.4.5.1 < a.4.5.11 < b.1.1.1 | |
63 | |
64 A sccs (e.g. a.4.5.11) compactly represents a domain's classification. | |
65 The letter represents the class, and the numbers are the fold, | |
66 superfamily, and family, respectively. | |
67 | |
68 """ | |
69 | |
70 s1 = sccs1.split(".") | |
71 s2 = sccs2.split(".") | |
72 | |
73 if s1[0] != s2[0]: return cmp(s1[0], s2[0]) | |
74 | |
75 s1 = map(int, s1[1:]) | |
76 s2 = map(int, s2[1:]) | |
77 | |
78 return cmp(s1,s2) | |
79 | |
80 | |
81 | |
82 def _open_scop_file(scop_dir_path, version, filetype) : | |
83 filename = "dir.%s.scop.txt_%s" % (filetype,version) | |
84 afile = open(os.path.join( scop_dir_path, filename)) | |
85 return afile | |
86 | |
87 | |
88 class Scop(object): | |
89 """The entire SCOP hierarchy. | |
90 | |
91 root -- The root node of the hierarchy | |
92 domains -- A list of all domains | |
93 nodes_by_sid -- A dictionary of nodes indexed by SCOP identifier | |
94 (e.g. 'd1hbia_') | |
95 domains_by_sunid -- A dictionary of domains indexed by SCOP uniquie | |
96 identifiers (e.g. 14996) | |
97 """ | |
98 def __init__(self): | |
99 """ An empty Scop object. | |
100 | |
101 See also Scop.parse() and Scop.parse_files() | |
102 """ | |
103 self.root = None | |
104 self.domains = [] | |
105 self.nodes_by_sunid = dict() | |
106 self.domains_by_sid = dict() | |
107 | |
108 #@classmethod | |
109 def parse(cls, dir_path, version) : | |
110 """Build the SCOP hierarchy from the SCOP parsable files. | |
111 | |
112 - dir_path -- A directory that contains the SCOP files | |
113 - version -- The SCOP version (as a string) | |
114 | |
115 The SCOP files are named dir.XXX.scop.txt_VERSION, where XXX | |
116 is 'cla', 'des' or 'hie'. | |
117 """ | |
118 cla_file = None | |
119 des_file = None | |
120 hie_file = None | |
121 try : | |
122 cla_file = _open_scop_file( dir_path, version, 'cla') | |
123 des_file = _open_scop_file( dir_path, version, 'des') | |
124 hie_file = _open_scop_file( dir_path, version, 'hie') | |
125 scop = cls.parse_files(cla_file, des_file, hie_file) | |
126 finally : | |
127 # If we opened the files, we close the files | |
128 if cla_file : cla_file.close() | |
129 if des_file : des_file.close() | |
130 if hie_file : hie_file.close() | |
131 | |
132 return scop | |
133 parse = classmethod(parse) | |
134 | |
135 #@classmethod | |
136 def parse_files(cls, cla_file, des_file, hie_file): | |
137 """Build the SCOP hierarchy from the SCOP parsable files. | |
138 | |
139 - cla_file -- the CLA clasification file | |
140 - des_file -- the DES description file | |
141 - hie_file -- the HIE hierarchy file | |
142 """ | |
143 | |
144 self = cls() | |
145 | |
146 sunidDict = {} | |
147 | |
148 root = Node() | |
149 domains = [] | |
150 root.sunid=0 | |
151 root.type='ro' | |
152 sunidDict[root.sunid] = root | |
153 | |
154 root.description = 'SCOP Root' | |
155 | |
156 # Build the rest of the nodes using the DES file | |
157 for rec in DesRecord.records(des_file): | |
158 if rec.nodetype =='px' : | |
159 n = Domain() | |
160 n.sid = rec.name | |
161 domains.append(n) | |
162 else : | |
163 n = Node() | |
164 n.sunid = rec.sunid | |
165 n.type = rec.nodetype | |
166 n.sccs = rec.sccs | |
167 n.description = rec.description | |
168 | |
169 sunidDict[n.sunid] = n | |
170 | |
171 # Glue all of the Nodes together using the HIE file | |
172 for rec in HieRecord.records(hie_file): | |
173 if not rec.sunid in sunidDict : | |
174 print rec.sunid #FIXME: HUH? | |
175 | |
176 n = sunidDict[rec.sunid] | |
177 if rec.parent !='': # Not root node | |
178 if not rec.parent in sunidDict : | |
179 raise ValueError("Incomplete data?") | |
180 n.parent = sunidDict[rec.parent] | |
181 | |
182 for c in rec.children: | |
183 if not c in sunidDict : | |
184 raise ValueError("Incomplete data?") | |
185 n.children.append(sunidDict[c]) | |
186 | |
187 | |
188 # Fill in the gaps with information from the CLA file | |
189 sidDict = {} | |
190 for rec in ClaRecord.records(cla_file): | |
191 n = sunidDict[rec.sunid] | |
192 assert n.sccs == rec.sccs | |
193 assert n.sid == rec.sid | |
194 n.residues = rec.residues | |
195 sidDict[n.sid] = n | |
196 | |
197 # Clean up | |
198 self.root = root | |
199 self.nodes_by_sunid = sunidDict | |
200 self.domains_by_sid = sidDict | |
201 self.domains = tuple(domains) | |
202 | |
203 return self | |
204 parse_files = classmethod(parse_files) | |
205 | |
206 | |
207 def write_hie(self, stream) : | |
208 """Build an HIE SCOP parsable file from this object""" | |
209 nodes = self.nodes_by_sunid.values() | |
210 # We order nodes to ease comparison with original file | |
211 nodes.sort(lambda n1,n2: cmp(n1.sunid, n2.sunid)) | |
212 | |
213 for n in nodes : | |
214 stream.write(str(n.to_hie_record())) | |
215 | |
216 | |
217 def write_des(self, stream) : | |
218 """Build a DES SCOP parsable file from this object""" | |
219 nodes = self.nodes_by_sunid.values() | |
220 # Origional SCOP file is not ordered? | |
221 nodes.sort(lambda n1,n2: cmp(n1.sunid, n2.sunid)) | |
222 | |
223 for n in nodes : | |
224 if n != self.root : | |
225 stream.write(str(n.to_des_record())) | |
226 | |
227 | |
228 def write_cla(self, stream) : | |
229 """Build a CLA SCOP parsable file from this object""" | |
230 nodes = self.domains_by_sid.values() | |
231 # We order nodes to ease comparison with original file | |
232 nodes.sort(lambda n1,n2: cmp(n1.sunid, n2.sunid)) | |
233 | |
234 for n in nodes : | |
235 stream.write(str(n.to_cla_record())) | |
236 # End Scop | |
237 | |
238 | |
239 | |
240 class Node(object) : | |
241 """ A node in the Scop hierarchy | |
242 | |
243 sunid -- SCOP unique identifiers. e.g. '14986' | |
244 parent -- The parent node | |
245 children -- A list of child nodes | |
246 sccs -- SCOP concise classification string. e.g. 'a.1.1.2' | |
247 type -- A 2 letter node type code. e.g. 'px' for domains | |
248 description -- | |
249 | |
250 """ | |
251 def __init__(self) : | |
252 """A new, uninitilized SCOP node.""" | |
253 self.sunid='' | |
254 self.parent = None | |
255 self.children=[] | |
256 self.sccs = '' | |
257 self.type ='' | |
258 self.description ='' | |
259 | |
260 def __str__(self) : | |
261 s = [] | |
262 s.append(str(self.sunid)) | |
263 s.append(self.sccs) | |
264 s.append(self.type) | |
265 s.append(self.description) | |
266 | |
267 return " ".join(s) | |
268 | |
269 def to_hie_record(self): | |
270 """Return an Hie.Record""" | |
271 rec = HieRecord() | |
272 rec.sunid = str(self.sunid) | |
273 if self.parent : # Not root node | |
274 rec.parent = str(self.parent.sunid) | |
275 else: | |
276 rec.parent = '-' | |
277 for c in self.children : | |
278 rec.children.append(str(c.sunid)) | |
279 return rec | |
280 | |
281 def to_des_record(self): | |
282 """Return a Des.Record""" | |
283 rec = DesRecord() | |
284 rec.sunid = str(self.sunid) | |
285 rec.nodetype = self.type | |
286 rec.sccs = self.sccs | |
287 rec.description = self.description | |
288 return rec | |
289 | |
290 def descendents( self, node_type) : | |
291 """ Return a list of all decendent nodes of the given type. Node type | |
292 can be a two letter code or longer description. e.g. 'fa' or 'family' | |
293 """ | |
294 if node_type in _nodetype_to_code: | |
295 node_type = _nodetype_to_code[node_type] | |
296 | |
297 nodes = [self] | |
298 | |
299 while nodes[0].type != node_type: | |
300 if nodes[0].type == 'px' : | |
301 return [] # Fell of the bottom of the hierarchy | |
302 child_list = [] | |
303 for n in nodes: | |
304 for child in n.children: | |
305 child_list.append( child ) | |
306 nodes = child_list | |
307 | |
308 return nodes | |
309 | |
310 | |
311 def ascendent( self, node_type) : | |
312 """ Return the ancestor node of the given type, or None. Node type can | |
313 be a two letter code or longer description. e.g. 'fa' or 'family' | |
314 """ | |
315 if node_type in _nodetype_to_code : | |
316 node_type = _nodetype_to_code[node_type] | |
317 | |
318 n = self | |
319 if n.type == node_type: return None | |
320 while n.type != node_type: | |
321 if n.type == 'ro': | |
322 return None # Fell of the top of the hierarchy | |
323 n = n.parent | |
324 | |
325 return n | |
326 # End Node | |
327 | |
328 | |
329 class Domain(Node) : | |
330 """ A SCOP domain. A leaf node in the Scop hierarchy. | |
331 | |
332 - sid -- The SCOP domain identifier. e.g. 'd5hbib_' | |
333 - residues -- A Residue object. It defines the collection | |
334 of PDB atoms that make up this domain. | |
335 """ | |
336 def __init__(self) : | |
337 Node.__init__(self) | |
338 self.sid = '' | |
339 self.residues = None | |
340 | |
341 def __str__(self) : | |
342 s = [] | |
343 s.append(self.sid) | |
344 s.append(self.sccs) | |
345 s.append("("+str(self.residues)+")") | |
346 | |
347 if not self.parent : | |
348 s.append(self.description) | |
349 else : | |
350 sp = self.parent | |
351 dm = sp.parent | |
352 s.append(dm.description) | |
353 s.append("{"+sp.description+"}") | |
354 | |
355 return " ".join(s) | |
356 | |
357 def to_des_record(self): | |
358 """Return a des.Record""" | |
359 rec = Node.to_des_record(self) | |
360 rec.name = self.sid | |
361 return rec | |
362 | |
363 def to_cla_record(self) : | |
364 """Return a cla.Record""" | |
365 rec = ClaRecord() | |
366 rec.sid = self.sid | |
367 rec.residues = self.residues | |
368 rec.sccs = self.sccs | |
369 rec.sunid = self.sunid | |
370 | |
371 n = self | |
372 while n.sunid != 0: # Not root node | |
373 rec.hierarchy.append( (n.type, str(n.sunid)) ) | |
374 n = n.parent | |
375 | |
376 rec.hierarchy.reverse() | |
377 | |
378 return rec | |
379 # End Domain | |
380 | |
381 | |
382 | |
383 class DesRecord(object): | |
384 """ Handle the SCOP DEScription file. | |
385 | |
386 The file format is described in the scop | |
387 "release notes.":http://scop.berkeley.edu/release-notes-1.55.html | |
388 The latest DES file can be found | |
389 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
390 | |
391 The DES file consisnt of one DES record per line. Each record | |
392 holds information for one node in the SCOP hierarchy, and consist | |
393 of 5 tab deliminated fields, | |
394 sunid, node type, sccs, node name, node description. | |
395 | |
396 For example :: | |
397 | |
398 21953 px b.1.2.1 d1dan.1 1dan T:,U:91-106 | |
399 48724 cl b - All beta proteins | |
400 48725 cf b.1 - Immunoglobulin-like beta-sandwich | |
401 49265 sf b.1.2 - Fibronectin type III | |
402 49266 fa b.1.2.1 - Fibronectin type III | |
403 | |
404 | |
405 - sunid -- SCOP unique identifiers | |
406 - nodetype -- One of 'cl' (class), 'cf' (fold), 'sf' (superfamily), | |
407 'fa' (family), 'dm' (protein), 'sp' (species), | |
408 'px' (domain). Additional node types may be added. | |
409 - sccs -- SCOP concise classification strings. e.g. b.1.2.1 | |
410 - name -- The SCOP ID (sid) for domains (e.g. d1anu1), | |
411 currently empty for other node types | |
412 - description -- e.g. "All beta proteins","Fibronectin type III", | |
413 """ | |
414 def __init__(self, record=None): | |
415 | |
416 if not record : | |
417 self.sunid = '' | |
418 self.nodetype = '' | |
419 self.sccs = '' | |
420 self.name = '' | |
421 self.description ='' | |
422 else : | |
423 entry = record.rstrip() # no trailing whitespace | |
424 columns = entry.split("\t") # separate the tab-delineated cols | |
425 if len(columns) != 5: | |
426 raise ValueError("I don't understand the format of %s" % entry) | |
427 | |
428 self.sunid, self.nodetype, self.sccs, self.name, self.description \ | |
429 = columns | |
430 if self.name == '-' : self.name ='' | |
431 self.sunid = int(self.sunid) | |
432 | |
433 def __str__(self): | |
434 s = [] | |
435 s.append(self.sunid) | |
436 s.append(self.nodetype) | |
437 s.append(self.sccs) | |
438 if self.name : | |
439 s.append(self.name) | |
440 else : | |
441 s.append("-") | |
442 s.append(self.description) | |
443 return "\t".join(map(str,s)) + "\n" | |
444 | |
445 #@staticmethod | |
446 def records(des_file): | |
447 """Iterates over a DES file, generating DesRecords """ | |
448 for line in des_file: | |
449 if line[0] =='#': continue # A comment | |
450 if line.isspace() : continue | |
451 yield DesRecord(line) | |
452 records = staticmethod(records) | |
453 # End DesRecord | |
454 | |
455 class HieRecord(object): | |
456 """Handle the SCOP HIErarchy files, which describe the SCOP hierarchy in | |
457 terms of SCOP unique identifiers (sunid). | |
458 | |
459 The file format is described in the scop | |
460 "release notes.":http://scop.berkeley.edu/release-notes-1.55.html | |
461 The latest HIE file can be found | |
462 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
463 | |
464 "Release 1.55":http://scop.berkeley.edu/parse/dir.hie.scop.txt_1.55 | |
465 Records consist of 3 tab deliminated fields; node's sunid, | |
466 parent's sunid, and a list of children's sunids. For example :: | |
467 | |
468 0 - 46456,48724,51349,53931,56572,56835,56992,57942 | |
469 21953 49268 - | |
470 49267 49266 49268,49269 | |
471 | |
472 Each record holds information for one node in the SCOP hierarchy. | |
473 | |
474 sunid -- SCOP unique identifiers of this node | |
475 parent -- Parents sunid | |
476 children -- Sequence of childrens sunids | |
477 """ | |
478 def __init__(self, record = None): | |
479 self.sunid = None | |
480 self.parent = None | |
481 self.children = [] | |
482 | |
483 if not record : return | |
484 | |
485 # Parses HIE records. | |
486 entry = record.rstrip() # no trailing whitespace | |
487 columns = entry.split('\t') # separate the tab-delineated cols | |
488 if len(columns) != 3: | |
489 raise ValueError("I don't understand the format of %s" % entry) | |
490 | |
491 self.sunid, self.parent, children = columns | |
492 | |
493 if self.sunid =='-' : self.sunid = '' | |
494 if self.parent =='-' : self.parent = '' | |
495 else : self.parent = int( self.parent ) | |
496 | |
497 if children =='-' : | |
498 self.children = () | |
499 else : | |
500 self.children = children.split(',') | |
501 self.children = map ( int, self.children ) | |
502 | |
503 self.sunid = int(self.sunid) | |
504 | |
505 def __str__(self): | |
506 s = [] | |
507 s.append(str(self.sunid)) | |
508 | |
509 if self.parent: | |
510 s.append(str(self.parent)) | |
511 else: | |
512 if self.sunid != 0: | |
513 s.append('0') | |
514 else: | |
515 s.append('-') | |
516 | |
517 if self.children : | |
518 child_str = map(str, self.children) | |
519 s.append(",".join(child_str)) | |
520 else: | |
521 s.append('-') | |
522 | |
523 return "\t".join(s) + "\n" | |
524 | |
525 | |
526 #@staticmethod | |
527 def records(hie_file): | |
528 """Iterates over a DOM file, generating DomRecords """ | |
529 for line in hie_file: | |
530 if line[0] =='#': continue # A comment | |
531 if line.isspace() : continue | |
532 yield HieRecord(line) | |
533 records = staticmethod(records) | |
534 # End HieRecord | |
535 | |
536 | |
537 | |
538 class ClaRecord(object): | |
539 """Handle the SCOP CLAssification file, which describes SCOP domains. | |
540 | |
541 The file format is described in the scop | |
542 "release notes.":http://scop.berkeley.edu/release-notes-1.55.html | |
543 The latest CLA file can be found | |
544 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
545 | |
546 sid -- SCOP identifier. e.g. d1danl2 | |
547 residues -- The domain definition as a Residues object | |
548 sccs -- SCOP concise classification strings. e.g. b.1.2.1 | |
549 sunid -- SCOP unique identifier for this domain | |
550 hierarchy -- A sequence of tuples (nodetype, sunid) describing the | |
551 location of this domain in the SCOP hierarchy. | |
552 See the Scop module for a description of nodetypes. | |
553 """ | |
554 def __init__(self, record=None): | |
555 self.sid = '' | |
556 self.residues = None | |
557 self.sccs = '' | |
558 self.sunid ='' | |
559 self.hierarchy = [] | |
560 | |
561 if not record: return | |
562 | |
563 # Parse a tab-deliminated CLA record. | |
564 entry = record.rstrip() # no trailing whitespace | |
565 columns = entry.split('\t') # separate the tab-delineated cols | |
566 if len(columns) != 6: | |
567 raise ValueError("I don't understand the format of %s" % entry) | |
568 | |
569 self.sid, pdbid, residues, self.sccs, self.sunid, hierarchy = columns | |
570 self.residues = Residues(residues) | |
571 self.residues.pdbid = pdbid | |
572 self.sunid = int(self.sunid) | |
573 | |
574 h = [] | |
575 for ht in hierarchy.split(",") : | |
576 h.append( ht.split('=')) | |
577 for ht in h: | |
578 ht[1] = int(ht[1]) | |
579 self.hierarchy = h | |
580 | |
581 def __str__(self): | |
582 s = [] | |
583 s.append(self.sid) | |
584 s += str(self.residues).split(" ") | |
585 s.append(self.sccs) | |
586 s.append(self.sunid) | |
587 | |
588 h=[] | |
589 for ht in self.hierarchy: | |
590 h.append("=".join(map(str,ht))) | |
591 s.append(",".join(h)) | |
592 | |
593 return "\t".join(map(str,s)) + "\n" | |
594 | |
595 #@staticmethod | |
596 def records(cla_file): | |
597 """Iterates over a DOM file, generating DomRecords """ | |
598 for line in cla_file: | |
599 if line[0] =='#': continue # A comment | |
600 if line.isspace() : continue | |
601 yield ClaRecord(line) | |
602 records = staticmethod(records) | |
603 | |
604 # End ClaRecord | |
605 | |
606 | |
607 | |
608 | |
609 class DomRecord(object): | |
610 """Handle the SCOP DOMain file. | |
611 | |
612 The DOM file has been officially deprecated. For more information see | |
613 the SCOP"release notes.":http://scop.berkeley.edu/release-notes-1.55.html | |
614 The DOM files for older releases can be found | |
615 "elsewhere at SCOP.":http://scop.mrc-lmb.cam.ac.uk/scop/parse/ | |
616 | |
617 DOM records consist of 4 tab deliminated fields; | |
618 sid, pdbid, residues, hierarchy | |
619 For example :: | |
620 | |
621 d1sctg_ 1sct g: 1.001.001.001.001.001 | |
622 d1scth_ 1sct h: 1.001.001.001.001.001 | |
623 d1flp__ 1flp - 1.001.001.001.001.002 | |
624 d1moh__ 1moh - 1.001.001.001.001.002 | |
625 | |
626 sid -- The SCOP ID of the entry, e.g. d1anu1 | |
627 residues -- The domain definition as a Residues object | |
628 hierarchy -- A string specifying where this domain is in the hierarchy. | |
629 """ | |
630 def __init__(self, record= None): | |
631 self.sid = '' | |
632 self.residues = [] | |
633 self.hierarchy = '' | |
634 | |
635 if record: | |
636 entry = record.rstrip() # no trailing whitespace | |
637 columns = entry.split("\t") # separate the tab-delineated cols | |
638 if len(columns) != 4: | |
639 raise ValueError("I don't understand the format of %s" % entry) | |
640 self.sid, pdbid, res, self.hierarchy = columns | |
641 self.residues = Residues(res) | |
642 self.residues.pdbid = pdbid | |
643 | |
644 def __str__(self): | |
645 s = [] | |
646 s.append(self.sid) | |
647 s.append(str(self.residues).replace(" ","\t") ) | |
648 s.append(self.hierarchy) | |
649 return "\t".join(s) + "\n" | |
650 | |
651 #@staticmethod | |
652 def records(dom_file): | |
653 """Iterates over a DOM file, generating DomRecords """ | |
654 for line in dom_file: | |
655 if line[0] =='#': continue # A comment | |
656 if line.isspace() : continue | |
657 yield DomRecord(line) | |
658 records = staticmethod(records) | |
659 # End DomRecord | |
660 | |
661 | |
662 | |
663 | |
664 _pdbid_re = re.compile(r"^(\w\w\w\w)(?:$|\s+|_)(.*)") | |
665 _fragment_re = re.compile(r"\(?(\w:)?(-?\w*)-?(-?\w*)\)?(.*)") | |
666 | |
667 class Residues(object) : | |
668 """A collection of residues from a PDB structure. | |
669 | |
670 This class provides code to work with SCOP domain definitions. These | |
671 are concisely expressed as a one or more chain fragments. For example, | |
672 "(1bba A:10-20,B:)" indicates residue 10 through 20 (inclusive) of | |
673 chain A, and every residue of chain B in the pdb structure 1bba. The pdb | |
674 id and brackets are optional. In addition "-" indicates every residue of | |
675 a pbd structure with one unnamed chain. | |
676 | |
677 Start and end residue ids consist of the residue sequence number and an | |
678 optional single letter insertion code. e.g. "12", "-1", "1a", "1000" | |
679 | |
680 | |
681 pdbid -- An optional PDB id, e.g. "1bba" | |
682 fragments -- A sequence of tuples (chainID, startResID, endResID) | |
683 """ | |
684 | |
685 | |
686 def __init__(self, str=None) : | |
687 self.pdbid = '' | |
688 self.fragments = () | |
689 if str is not None : self._parse(str) | |
690 | |
691 | |
692 def _parse(self, string): | |
693 string = string.strip() | |
694 | |
695 #Is there a pdbid at the front? e.g. 1bba A:1-100 | |
696 m = _pdbid_re.match(string) | |
697 if m is not None : | |
698 self.pdbid = m.group(1) | |
699 string = m.group(2) # Everything else | |
700 | |
701 if string=='' or string == '-' or string=='(-)': # no fragments, whole sequence | |
702 return | |
703 | |
704 fragments = [] | |
705 for l in string.split(",") : | |
706 m = _fragment_re.match(l) | |
707 if m is None: | |
708 raise ValueError("I don't understand the format of %s" % l) | |
709 chain, start, end, postfix = m.groups() | |
710 | |
711 if postfix != "" : | |
712 raise ValueError("I don't understand the format of %s" % l ) | |
713 | |
714 if chain: | |
715 if chain[-1] != ':': | |
716 raise ValueError("I don't understand the chain in %s" % l) | |
717 chain = chain[:-1] # chop off the ':' | |
718 else : | |
719 chain ="" | |
720 | |
721 fragments.append((chain, start, end)) | |
722 self.fragments = tuple(fragments) | |
723 | |
724 def __str__(self): | |
725 prefix ="" | |
726 if self.pdbid : | |
727 prefix =self.pdbid +' ' | |
728 | |
729 if not self.fragments: return prefix+'-' | |
730 strs = [] | |
731 for chain, start, end in self.fragments: | |
732 s = [] | |
733 if chain: s.append("%s:" % chain) | |
734 if start: s.append("%s-%s" % (start, end)) | |
735 strs.append("".join(s)) | |
736 return prefix+ ",".join(strs) | |
737 # End Residues | |
738 | |
739 | |
740 | |
741 | |
742 | |
743 | |
744 | |
745 | |
746 | |
747 | |
748 | |
749 | |
750 | |
751 | |
752 | |
753 | |
754 | |
755 | |
756 | |
757 |