Mercurial > repos > davidmurphy > codonlogo
comparison corebio/resource/stride.py @ 4:4d47ab2b7bcc
Uploaded
author | davidmurphy |
---|---|
date | Fri, 13 Jan 2012 07:18:19 -0500 |
parents | c55bdc2fb9fa |
children |
comparison
equal
deleted
inserted
replaced
3:09d2dac9ef73 | 4:4d47ab2b7bcc |
---|---|
1 | |
2 # Copyright (c) 2003 Gavin E. Crooks | |
3 # Copyright (c) 2005 David D. Ding <dding@berkeley.edu> | |
4 # | |
5 # This software is distributed under the MIT Open Source License. | |
6 # <http://www.opensource.org/licenses/mit-license.html> | |
7 # | |
8 # Permission is hereby granted, free of charge, to any person obtaining a | |
9 # copy of this software and associated documentation files (the "Software"), | |
10 # to deal in the Software without restriction, including without limitation | |
11 # the rights to use, copy, modify, merge, publish, distribute, sublicense, | |
12 # and/or sell copies of the Software, and to permit persons to whom the | |
13 # Software is furnished to do so, subject to the following conditions: | |
14 # | |
15 # The above copyright notice and this permission notice shall be included | |
16 # in all copies or substantial portions of the Software. | |
17 # | |
18 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
19 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
20 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
21 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
22 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
23 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
24 # THE SOFTWARE. | |
25 | |
26 """STRIDE: Protein secondary structure assignment from atomic coordinates. | |
27 | |
28 This module provides an interface to STRIDE, a c program used to recognize | |
29 secondary structural elements in proteins from their atomic coordinates. | |
30 | |
31 """ | |
32 | |
33 from corebio.seq import Seq, protein_alphabet, Alphabet | |
34 from corebio.resource.astral import to_one_letter_code | |
35 | |
36 # alphabet for stride secondary structure | |
37 stride_alphabet = Alphabet("HGIEBC12345678@&T") | |
38 | |
39 # Dictionary for conversion between names and alphabet | |
40 stride_alphabet_names = ( | |
41 "H", "AlphaHelix", | |
42 "G", "310Helix", | |
43 "I", "PiHelix", | |
44 "E", "Strand", | |
45 "b", "Bridge", | |
46 "B", "Bridge", | |
47 "C", "Coil", | |
48 "1", "TurnI", | |
49 "2", "TurnI'", | |
50 "3", "TurnII", | |
51 "4", "TurnII'", | |
52 "5", "TurnVIa", | |
53 "6", "TurnVIb", | |
54 "7", "TurnVIII", | |
55 "8", "TurnIV", | |
56 "@", "GammaClassic", | |
57 "&", "GammaInv", | |
58 "T", "Turn" | |
59 ) | |
60 | |
61 | |
62 class Stride(object) : | |
63 def __init__(self, stride_file) : | |
64 """ Read and parse a STRIDE output file. | |
65 | |
66 args: | |
67 - stride_file : An open file handle | |
68 attributes : | |
69 - pdbid : The PDB id. | |
70 - res : A list of Res objects, one per PDB resiude | |
71 """ | |
72 res =[] | |
73 f=stride_file | |
74 self.pdbid = f.readline()[75:79] | |
75 for l in f: | |
76 if l[0:3] =="ASG": | |
77 res.append(Res(l)) | |
78 | |
79 self.res = res # A list of Res objects | |
80 | |
81 self._res_dict = None | |
82 | |
83 def total_area(self) : | |
84 """ Return the solvent accessible area """ | |
85 area = 0 | |
86 for i in self.res : | |
87 area += i.solvent_acc_area | |
88 return area | |
89 | |
90 def primary(self): | |
91 """ Return the protein primary sequence as a Seq object.""" | |
92 return Seq(''.join([r.primary_seq for r in self.res]), protein_alphabet) | |
93 | |
94 def secondary(self): | |
95 """Return the secondary structure of the protien as a Seq object""" | |
96 return Seq(''.join([r.secondary_str for r in self.res]), stride_alphabet) | |
97 | |
98 | |
99 def get_res(self, chainid, resid) : | |
100 """ Return the given resiude """ | |
101 if not self._res_dict : | |
102 d = {} | |
103 for r in self.res : | |
104 d[ (r.chainid, r.resid)] = r | |
105 self._res_dict =d | |
106 | |
107 return self._res_dict[(chainid, resid)] | |
108 | |
109 | |
110 | |
111 class Res(object): | |
112 """ Structural information of a single resiude. An ASG line from a stride | |
113 output file. | |
114 | |
115 Attributes : | |
116 - chainid | |
117 - resid | |
118 - primary_seq | |
119 - secondary_str | |
120 - solvent_acc_area | |
121 - phi | |
122 - psi | |
123 """ | |
124 | |
125 def __init__(self, res_line) : | |
126 """ Eats a single 'ASG' line from a stride file, splits it up | |
127 into parts and return a Res object.""" | |
128 | |
129 if (len(res_line)<70): | |
130 raise ValueError("Line not long enough") | |
131 try: | |
132 self.chainid = res_line[9:10] | |
133 # STRIDE converts blank chain ids into dashes. Undo. | |
134 if self.chainid=="-" : self.chainid = " " | |
135 | |
136 # In rare cases STRIDE columns can be misaligned. Grab extra | |
137 # white space to compensate. | |
138 self.resid = res_line[10:15].strip() | |
139 self.primary_seq = to_one_letter_code[res_line[5:8].capitalize()] | |
140 self.secondary_str = res_line[24:25] | |
141 self.solvent_acc_area = float(res_line[64:71]) | |
142 self.phi = float(res_line[42:49].strip()) | |
143 self.psi = float(res_line[52:59].strip()) | |
144 except FloatingPointError: | |
145 raise FloatingPointError("Can't float phi, psi, or area") | |
146 except KeyError: | |
147 raise KeyError("Can't find three letter code in dictionary") | |
148 except LookupError: | |
149 raise LookupError("One of the values is out of index of res_line") | |
150 | |
151 | |
152 | |
153 | |
154 | |
155 | |
156 | |
157 | |
158 | |
159 | |
160 | |
161 | |
162 | |
163 | |
164 |