annotate corebio/resource/stride.py @ 4:4d47ab2b7bcc

Uploaded
author davidmurphy
date Fri, 13 Jan 2012 07:18:19 -0500
parents c55bdc2fb9fa
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
1
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
2 # Copyright (c) 2003 Gavin E. Crooks
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
3 # Copyright (c) 2005 David D. Ding <dding@berkeley.edu>
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
4 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
5 # This software is distributed under the MIT Open Source License.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
6 # <http://www.opensource.org/licenses/mit-license.html>
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
7 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
8 # Permission is hereby granted, free of charge, to any person obtaining a
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
9 # copy of this software and associated documentation files (the "Software"),
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
10 # to deal in the Software without restriction, including without limitation
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
12 # and/or sell copies of the Software, and to permit persons to whom the
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
13 # Software is furnished to do so, subject to the following conditions:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
14 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
15 # The above copyright notice and this permission notice shall be included
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
16 # in all copies or substantial portions of the Software.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
17 #
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
24 # THE SOFTWARE.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
25
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
26 """STRIDE: Protein secondary structure assignment from atomic coordinates.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
27
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
28 This module provides an interface to STRIDE, a c program used to recognize
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
29 secondary structural elements in proteins from their atomic coordinates.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
30
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
31 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
32
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
33 from corebio.seq import Seq, protein_alphabet, Alphabet
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
34 from corebio.resource.astral import to_one_letter_code
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
35
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
36 # alphabet for stride secondary structure
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
37 stride_alphabet = Alphabet("HGIEBC12345678@&T")
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
38
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
39 # Dictionary for conversion between names and alphabet
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
40 stride_alphabet_names = (
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
41 "H", "AlphaHelix",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
42 "G", "310Helix",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
43 "I", "PiHelix",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
44 "E", "Strand",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
45 "b", "Bridge",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
46 "B", "Bridge",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
47 "C", "Coil",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
48 "1", "TurnI",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
49 "2", "TurnI'",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
50 "3", "TurnII",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
51 "4", "TurnII'",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
52 "5", "TurnVIa",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
53 "6", "TurnVIb",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
54 "7", "TurnVIII",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
55 "8", "TurnIV",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
56 "@", "GammaClassic",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
57 "&", "GammaInv",
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
58 "T", "Turn"
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
59 )
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
60
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
61
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
62 class Stride(object) :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
63 def __init__(self, stride_file) :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
64 """ Read and parse a STRIDE output file.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
65
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
66 args:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
67 - stride_file : An open file handle
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
68 attributes :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
69 - pdbid : The PDB id.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
70 - res : A list of Res objects, one per PDB resiude
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
71 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
72 res =[]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
73 f=stride_file
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
74 self.pdbid = f.readline()[75:79]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
75 for l in f:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
76 if l[0:3] =="ASG":
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
77 res.append(Res(l))
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
78
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
79 self.res = res # A list of Res objects
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
80
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
81 self._res_dict = None
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
82
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
83 def total_area(self) :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
84 """ Return the solvent accessible area """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
85 area = 0
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
86 for i in self.res :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
87 area += i.solvent_acc_area
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
88 return area
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
89
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
90 def primary(self):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
91 """ Return the protein primary sequence as a Seq object."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
92 return Seq(''.join([r.primary_seq for r in self.res]), protein_alphabet)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
93
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
94 def secondary(self):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
95 """Return the secondary structure of the protien as a Seq object"""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
96 return Seq(''.join([r.secondary_str for r in self.res]), stride_alphabet)
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
97
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
98
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
99 def get_res(self, chainid, resid) :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
100 """ Return the given resiude """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
101 if not self._res_dict :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
102 d = {}
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
103 for r in self.res :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
104 d[ (r.chainid, r.resid)] = r
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
105 self._res_dict =d
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
106
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
107 return self._res_dict[(chainid, resid)]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
108
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
109
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
110
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
111 class Res(object):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
112 """ Structural information of a single resiude. An ASG line from a stride
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
113 output file.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
114
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
115 Attributes :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
116 - chainid
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
117 - resid
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
118 - primary_seq
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
119 - secondary_str
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
120 - solvent_acc_area
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
121 - phi
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
122 - psi
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
123 """
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
124
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
125 def __init__(self, res_line) :
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
126 """ Eats a single 'ASG' line from a stride file, splits it up
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
127 into parts and return a Res object."""
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
128
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
129 if (len(res_line)<70):
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
130 raise ValueError("Line not long enough")
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
131 try:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
132 self.chainid = res_line[9:10]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
133 # STRIDE converts blank chain ids into dashes. Undo.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
134 if self.chainid=="-" : self.chainid = " "
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
135
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
136 # In rare cases STRIDE columns can be misaligned. Grab extra
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
137 # white space to compensate.
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
138 self.resid = res_line[10:15].strip()
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
139 self.primary_seq = to_one_letter_code[res_line[5:8].capitalize()]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
140 self.secondary_str = res_line[24:25]
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
141 self.solvent_acc_area = float(res_line[64:71])
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
142 self.phi = float(res_line[42:49].strip())
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
143 self.psi = float(res_line[52:59].strip())
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
144 except FloatingPointError:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
145 raise FloatingPointError("Can't float phi, psi, or area")
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
146 except KeyError:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
147 raise KeyError("Can't find three letter code in dictionary")
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
148 except LookupError:
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
149 raise LookupError("One of the values is out of index of res_line")
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
150
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
151
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
152
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
153
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
154
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
155
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
156
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
157
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
158
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
159
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
160
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
161
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
162
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
163
c55bdc2fb9fa Uploaded
davidmurphy
parents:
diff changeset
164