0
|
1
|
|
2 # Copyright (c) 2003 Gavin E. Crooks
|
|
3 # Copyright (c) 2005 David D. Ding <dding@berkeley.edu>
|
|
4 #
|
|
5 # This software is distributed under the MIT Open Source License.
|
|
6 # <http://www.opensource.org/licenses/mit-license.html>
|
|
7 #
|
|
8 # Permission is hereby granted, free of charge, to any person obtaining a
|
|
9 # copy of this software and associated documentation files (the "Software"),
|
|
10 # to deal in the Software without restriction, including without limitation
|
|
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
12 # and/or sell copies of the Software, and to permit persons to whom the
|
|
13 # Software is furnished to do so, subject to the following conditions:
|
|
14 #
|
|
15 # The above copyright notice and this permission notice shall be included
|
|
16 # in all copies or substantial portions of the Software.
|
|
17 #
|
|
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
24 # THE SOFTWARE.
|
|
25
|
|
26 """STRIDE: Protein secondary structure assignment from atomic coordinates.
|
|
27
|
|
28 This module provides an interface to STRIDE, a c program used to recognize
|
|
29 secondary structural elements in proteins from their atomic coordinates.
|
|
30
|
|
31 """
|
|
32
|
|
33 from corebio.seq import Seq, protein_alphabet, Alphabet
|
|
34 from corebio.resource.astral import to_one_letter_code
|
|
35
|
|
36 # alphabet for stride secondary structure
|
|
37 stride_alphabet = Alphabet("HGIEBC12345678@&T")
|
|
38
|
|
39 # Dictionary for conversion between names and alphabet
|
|
40 stride_alphabet_names = (
|
|
41 "H", "AlphaHelix",
|
|
42 "G", "310Helix",
|
|
43 "I", "PiHelix",
|
|
44 "E", "Strand",
|
|
45 "b", "Bridge",
|
|
46 "B", "Bridge",
|
|
47 "C", "Coil",
|
|
48 "1", "TurnI",
|
|
49 "2", "TurnI'",
|
|
50 "3", "TurnII",
|
|
51 "4", "TurnII'",
|
|
52 "5", "TurnVIa",
|
|
53 "6", "TurnVIb",
|
|
54 "7", "TurnVIII",
|
|
55 "8", "TurnIV",
|
|
56 "@", "GammaClassic",
|
|
57 "&", "GammaInv",
|
|
58 "T", "Turn"
|
|
59 )
|
|
60
|
|
61
|
|
62 class Stride(object) :
|
|
63 def __init__(self, stride_file) :
|
|
64 """ Read and parse a STRIDE output file.
|
|
65
|
|
66 args:
|
|
67 - stride_file : An open file handle
|
|
68 attributes :
|
|
69 - pdbid : The PDB id.
|
|
70 - res : A list of Res objects, one per PDB resiude
|
|
71 """
|
|
72 res =[]
|
|
73 f=stride_file
|
|
74 self.pdbid = f.readline()[75:79]
|
|
75 for l in f:
|
|
76 if l[0:3] =="ASG":
|
|
77 res.append(Res(l))
|
|
78
|
|
79 self.res = res # A list of Res objects
|
|
80
|
|
81 self._res_dict = None
|
|
82
|
|
83 def total_area(self) :
|
|
84 """ Return the solvent accessible area """
|
|
85 area = 0
|
|
86 for i in self.res :
|
|
87 area += i.solvent_acc_area
|
|
88 return area
|
|
89
|
|
90 def primary(self):
|
|
91 """ Return the protein primary sequence as a Seq object."""
|
|
92 return Seq(''.join([r.primary_seq for r in self.res]), protein_alphabet)
|
|
93
|
|
94 def secondary(self):
|
|
95 """Return the secondary structure of the protien as a Seq object"""
|
|
96 return Seq(''.join([r.secondary_str for r in self.res]), stride_alphabet)
|
|
97
|
|
98
|
|
99 def get_res(self, chainid, resid) :
|
|
100 """ Return the given resiude """
|
|
101 if not self._res_dict :
|
|
102 d = {}
|
|
103 for r in self.res :
|
|
104 d[ (r.chainid, r.resid)] = r
|
|
105 self._res_dict =d
|
|
106
|
|
107 return self._res_dict[(chainid, resid)]
|
|
108
|
|
109
|
|
110
|
|
111 class Res(object):
|
|
112 """ Structural information of a single resiude. An ASG line from a stride
|
|
113 output file.
|
|
114
|
|
115 Attributes :
|
|
116 - chainid
|
|
117 - resid
|
|
118 - primary_seq
|
|
119 - secondary_str
|
|
120 - solvent_acc_area
|
|
121 - phi
|
|
122 - psi
|
|
123 """
|
|
124
|
|
125 def __init__(self, res_line) :
|
|
126 """ Eats a single 'ASG' line from a stride file, splits it up
|
|
127 into parts and return a Res object."""
|
|
128
|
|
129 if (len(res_line)<70):
|
|
130 raise ValueError("Line not long enough")
|
|
131 try:
|
|
132 self.chainid = res_line[9:10]
|
|
133 # STRIDE converts blank chain ids into dashes. Undo.
|
|
134 if self.chainid=="-" : self.chainid = " "
|
|
135
|
|
136 # In rare cases STRIDE columns can be misaligned. Grab extra
|
|
137 # white space to compensate.
|
|
138 self.resid = res_line[10:15].strip()
|
|
139 self.primary_seq = to_one_letter_code[res_line[5:8].capitalize()]
|
|
140 self.secondary_str = res_line[24:25]
|
|
141 self.solvent_acc_area = float(res_line[64:71])
|
|
142 self.phi = float(res_line[42:49].strip())
|
|
143 self.psi = float(res_line[52:59].strip())
|
|
144 except FloatingPointError:
|
|
145 raise FloatingPointError("Can't float phi, psi, or area")
|
|
146 except KeyError:
|
|
147 raise KeyError("Can't find three letter code in dictionary")
|
|
148 except LookupError:
|
|
149 raise LookupError("One of the values is out of index of res_line")
|
|
150
|
|
151
|
|
152
|
|
153
|
|
154
|
|
155
|
|
156
|
|
157
|
|
158
|
|
159
|
|
160
|
|
161
|
|
162
|
|
163
|
|
164
|