annotate commons/core/seq/Bioseq.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children 94ab73e8a190
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 # Copyright INRA (Institut National de la Recherche Agronomique)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 # http://www.inra.fr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 # http://urgi.versailles.inra.fr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 # "http://www.cecill.info".
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 # liability.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 # same conditions as regards security.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 import sys
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 import string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 import re
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 import random
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 import cStringIO
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 from commons.core.coord.Map import Map
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 DNA_ALPHABET_WITH_N = set( ['A','T','G','C','N'] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 IUPAC = set(['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N'])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 ## Record a sequence with its header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 class Bioseq( object ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 header = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 sequence = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 ## constructor
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 # @param name the header of sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 # @param seq sequence (DNA, RNA, protein)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 def __init__( self, name="", seq="" ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 self.header = name
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 self.sequence = seq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 ## Equal operator
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62 def __eq__( self, o ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 if self.header==o.header and self.sequence==o.sequence:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 return True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 return False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68 ## overload __repr__
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 def __repr__( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 return "%s;%s" % ( self.header, self.sequence )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 ## set attribute header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 # @param header a string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 def setHeader( self, header ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 self.header = header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 ## get attribute header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84 # @return header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 def getHeader(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 return self.header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89 ## set attribute sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91 # @param sequence a string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93 def setSequence( self, sequence ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94 self.sequence = sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97 def getSequence(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98 return self.sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 ## reset
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102 def reset( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 self.setHeader( "" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104 self.setSequence( "" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107 ## Test if bioseq is empty
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109 def isEmpty( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 return self.header == "" and self.sequence == ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113 ## Reverse the sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115 def reverse( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 tmp = self.sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117 self.sequence = tmp[::-1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120 ## Turn the sequence into its complement
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 # Force upper case letters
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122 # @warning: old name in pyRepet.Bioseq realComplement
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124 def complement( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125 complement = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 self.upCase()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127 for i in xrange(0,len(self.sequence),1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128 if self.sequence[i] == "A":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129 complement += "T"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130 elif self.sequence[i] == "T":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131 complement += "A"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132 elif self.sequence[i] == "C":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133 complement += "G"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134 elif self.sequence[i] == "G":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135 complement += "C"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136 elif self.sequence[i] == "M":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137 complement += "K"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138 elif self.sequence[i] == "R":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139 complement += "Y"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 elif self.sequence[i] == "W":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 complement += "W"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142 elif self.sequence[i] == "S":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143 complement += "S"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144 elif self.sequence[i] == "Y":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145 complement += "R"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146 elif self.sequence[i] == "K":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147 complement += "M"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148 elif self.sequence[i] == "V":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149 complement += "B"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 elif self.sequence[i] == "H":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151 complement += "D"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152 elif self.sequence[i] == "D":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153 complement += "H"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154 elif self.sequence[i] == "B":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155 complement += "V"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
156 elif self.sequence[i] == "N":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
157 complement += "N"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
158 elif self.sequence[i] == "-":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
159 complement += "-"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
160 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
161 print "WARNING: unknown symbol '%s', replacing it by N" % ( self.sequence[i] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
162 complement += "N"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
163 self.sequence = complement
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
164
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
165
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
166 ## Reverse and complement the sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
167 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
168 # Force upper case letters
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
169 # @warning: old name in pyRepet.Bioseq : complement
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
170 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
171 def reverseComplement( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
172 self.reverse()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
173 self.complement()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
174
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
175
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
176 ## Remove gap in the sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
177 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
178 def cleanGap(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
179 self.sequence = self.sequence.replace("-","")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
180
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
181
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
182 ## Copy current Bioseq Instance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
183 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
184 # @return: a Bioseq instance, a copy of current sequence.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
185 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
186 def copyBioseqInstance(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
187 seq = Bioseq()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
188 seq.sequence = self.sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
189 seq.header = self.header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
190 return seq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
191
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
192
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
193 ## Add phase information after the name of sequence in header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
194 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
195 # @param phase integer representing phase (1, 2, 3, -1, -2, -3)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
196 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
197 def setFrameInfoOnHeader(self, phase):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
198 if " " in self.header:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
199 name, desc = self.header.split(" ", 1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
200 name = name + "_" + str(phase)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
201 self.header = name + " " + desc
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
202 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
203 self.header = self.header + "_" + str(phase)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
204
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
205
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
206 ## Fill Bioseq attributes with fasta file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
207 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
208 # @param faFileHandler file handler of a fasta file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
209 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
210 def read( self, faFileHandler ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
211 line = faFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
212 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
213 self.header = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
214 self.sequence = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
215 return
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
216 while line == "\n":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
217 line = faFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
218 if line[0] == '>':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
219 self.header = string.rstrip(line[1:])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
220 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
221 print "error, line is",string.rstrip(line)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
222 return
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
223 line = " "
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
224 seq = cStringIO.StringIO()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
225 while line:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
226 prev_pos = faFileHandler.tell()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
227 line = faFileHandler.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
228 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
229 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
230 if line[0] == '>':
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
231 faFileHandler.seek( prev_pos )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
232 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
233 seq.write( string.rstrip(line) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
234 self.sequence = seq.getvalue()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
235
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
236
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
237 ## Create a subsequence with a modified header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
238 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
239 # @param s integer start a required subsequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
240 # @param e integer end a required subsequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
241 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
242 # @return a Bioseq instance, a subsequence of current sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
243 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
244 def subseq( self, s, e=0 ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
245 if e == 0 :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
246 e=len( self.sequence )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
247 if s > e :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
248 print "error: start must be < or = to end"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
249 return
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
250 if s <= 0 :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
251 print "error: start must be > 0"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
252 return
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
253 sub = Bioseq()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
254 sub.header = self.header + " fragment " + str(s) + ".." + str(e)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
255 sub.sequence = self.sequence[(s-1):e]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
256 return sub
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
257
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
258
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
259 ## Get the nucleotide or aminoacid at the given position
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
260 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
261 # @param pos integer nucleotide or aminoacid position
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
262 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
263 # @return a string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
264 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
265 def getNtFromPosition(self, pos):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
266 result = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
267 if not (pos < 1 or pos > self.getLength()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
268 result = self.sequence[pos - 1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
269 return result
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
270
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
271
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
272 ## Print in stdout the Bioseq in fasta format with 60 characters lines
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
273 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
274 # @param l length of required sequence default is whole sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
275 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
276 def view(self,l=0):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
277 print '>'+self.header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
278 i=0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
279 if(l==0):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
280 l=len(self.sequence)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
281 seq=self.sequence[0:l]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
282
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
283 while i<len(seq):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
284 print seq[i:i+60]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
285 i=i+60
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
286
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
287
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
288 ## Get length of sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
289 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
290 # @param avoidN boolean don't count 'N' nucleotides
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
291 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
292 # @return length of current sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
293 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
294 def getLength( self, countN = True ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
295 if countN:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
296 return len(self.sequence)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
297 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
298 return len(self.sequence) - self.countNt('N')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
299
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
300
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
301 ## Return the proportion of a specific character
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
302 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
303 # @param nt character that we want to count
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
304 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
305 def propNt( self, nt ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
306 return self.countNt( nt ) / float( self.getLength() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
307
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
308
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
309 ## Count occurrence of specific character
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
310 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
311 # @param nt character that we want to count
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
312 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
313 # @return nb of occurrences
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
314 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
315 def countNt( self, nt ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
316 return self.sequence.count( nt )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
317
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
318
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
319 ## Count occurrence of each nucleotide in current seq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
320 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
321 # @return a dict, keys are nucleotides, values are nb of occurrences
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
322 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
323 def countAllNt( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
324 dNt2Count = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
325 for nt in ["A","T","G","C","N"]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
326 dNt2Count[ nt ] = self.countNt( nt )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
327 return dNt2Count
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
328
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
329
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
330 ## Return a dict with the number of occurrences for each combination of ATGC of specified size and number of word found
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
331 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
332 # @param size integer required length word
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
333 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
334 def occ_word( self, size ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
335 occ = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
336 if size == 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
337 return occ,0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
338 nbword = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
339 srch = re.compile('[^ATGC]+')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
340 wordlist = self._createWordList( size )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
341 for i in wordlist:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
342 occ[i] = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
343 lenseq = len(self.sequence)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
344 i = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
345 while i < lenseq-size+1:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
346 word = self.sequence[i:i+size].upper()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
347 m = srch.search(word)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
348 if m == None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
349 occ[word] = occ[word]+1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
350 nbword = nbword + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
351 i = i + 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
352 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
353 i = i + m.end(0)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
354 return occ, nbword
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
355
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
356
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
357 ## Return a dictionary with the frequency of occurs for each combination of ATGC of specified size
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
358 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
359 # @param size integer required length word
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
360 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
361 def freq_word( self, size ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
362 dOcc, nbWords = self.occ_word( size )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
363 freq = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
364 for word in dOcc.keys():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
365 freq[word] = float(dOcc[word]) / nbWords
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
366 return freq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
367
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
368
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
369 ## Find ORF in each phase
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
370 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
371 # @return: a dict, keys are phases, values are stop codon positions.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
372 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
373 def findORF (self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
374 orf = {0:[],1:[],2:[]}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
375 length = len (self.sequence)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
376 for i in xrange(0,length):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
377 triplet = self.sequence[i:i+3]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
378 if ( triplet == "TAA" or triplet == "TAG" or triplet == "TGA"):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
379 phase = i % 3
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
380 orf[phase].append(i)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
381 return orf
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
382
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
383
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
384 ## Convert the sequence into upper case
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
385 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
386 def upCase( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
387 self.sequence = self.sequence.upper()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
388
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
389
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
390 ## Convert the sequence into lower case
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
391 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
392 def lowCase( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
393 self.sequence = self.sequence.lower()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
394
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
395
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
396 ## Extract the cluster of the fragment (output from Grouper)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
397 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
398 # @return cluster id (string)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
399 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
400 def getClusterID( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
401 data = self.header.split()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
402 return data[0].split("Cl")[1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
403
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
404
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
405 ## Extract the group of the sequence (output from Grouper)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
406 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
407 # @return group id (string)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
408 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
409 def getGroupID( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
410 data = self.header.split()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
411 return data[0].split("Gr")[1].split("Cl")[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
412
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
413
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
414 ## Get the header of the full sequence (output from Grouper)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
415 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
416 # @example 'Dmel_Grouper_3091_Malign_3:LARD' from '>MbS1566Gr81Cl81 Dmel_Grouper_3091_Malign_3:LARD {Fragment} 1..5203'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
417 # @return header (string)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
418 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
419 def getHeaderFullSeq( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
420 data = self.header.split()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
421 return data[1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
422
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
423
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
424 ## Get the strand of the fragment (output from Grouper)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
425 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
426 # @return: strand (+ or -)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
427 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
428 def getFragStrand( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
429 data = self.header.split()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
430 coord = data[3].split("..")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
431 if int(coord[0]) < int(coord[-1]):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
432 return "+"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
433 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
434 return "-"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
435
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
436
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
437 ## Get A, T, G, C or N from an IUPAC letter
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
438 # IUPAC = ['A','T','G','C','U','R','Y','M','K','W','S','B','D','H','V','N']
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
439 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
440 # @return A, T, G, C or N
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
441 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
442 def getATGCNFromIUPAC( self, nt ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
443 subset = ["A","T","G","C","N"]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
444
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
445 if nt in subset:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
446 return nt
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
447 elif nt == "U":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
448 return "T"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
449 elif nt == "R":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
450 return random.choice( "AG" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
451 elif nt == "Y":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
452 return random.choice( "CT" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
453 elif nt == "M":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
454 return random.choice( "CA" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
455 elif nt == "K":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
456 return random.choice( "TG" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
457 elif nt == "W":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
458 return random.choice( "TA" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
459 elif nt == "S":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
460 return random.choice( "CG" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
461 elif nt == "B":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
462 return random.choice( "CTG" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
463 elif nt == "D":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
464 return random.choice( "ATG" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
465 elif nt == "H":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
466 return random.choice( "ATC" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
467 elif nt == "V":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
468 return random.choice( "ACG" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
469 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
470 return "N"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
471
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
472
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
473 def getSeqWithOnlyATGCN( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
474 newSeq = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
475 for nt in self.sequence:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
476 newSeq += self.getATGCNFromIUPAC( nt )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
477 return newSeq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
478
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
479
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
480 ## Replace any symbol not in (A,T,G,C,N) by another nucleotide it represents
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
481 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
482 def partialIUPAC( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
483 self.sequence = self.getSeqWithOnlyATGCN()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
484
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
485
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
486 ## Remove non Unix end-of-line symbols, if any
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
487 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
488 def checkEOF( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
489 symbol = "\r" # corresponds to '^M' from Windows
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
490 if symbol in self.sequence:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
491 print "WARNING: Windows EOF removed in '%s'" % ( self.header )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
492 sys.stdout.flush()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
493 newSeq = self.sequence.replace( symbol, "" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
494 self.sequence = newSeq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
495
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
496
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
497 ## Write Bioseq instance into a fasta file handler
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
498 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
499 # @param faFileHandler file handler of a fasta file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
500 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
501 def write( self, faFileHandler ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
502 faFileHandler.write( ">%s\n" % ( self.header ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
503 self.writeSeqInFasta( faFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
504
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
505
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
506 ## Write only the sequence of Bioseq instance into a fasta file handler
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
507 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
508 # @param faFileHandler file handler of a fasta file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
509 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
510 def writeSeqInFasta( self, faFileHandler ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
511 i = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
512 while i < self.getLength():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
513 faFileHandler.write( "%s\n" % ( self.sequence[i:i+60] ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
514 i += 60
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
515
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
516
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
517 ## Append Bioseq instance to a fasta file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
518 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
519 # @param faFile name of a fasta file as a string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
520 # @param mode 'write' or 'append'
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
521 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
522 def save( self, faFile, mode="a" ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
523 faFileHandler = open( faFile, mode )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
524 self.write( faFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
525 faFileHandler.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
526
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
527
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
528 ## Append Bioseq instance to a fasta file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
529 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
530 # @param faFile name of a fasta file as a string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
531 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
532 def appendBioseqInFile( self, faFile ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
533 self.save( faFile, "a" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
534
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
535
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
536 ## Write Bioseq instance into a fasta file handler
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
537 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
538 # @param faFileHandler file handler on a file with writing right
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
539 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
540 def writeABioseqInAFastaFile( self, faFileHandler ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
541 self.write( faFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
542
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
543
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
544 ## Write Bioseq instance with other header into a fasta file handler
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
545 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
546 # @param faFileHandler file handler on a file with writing right
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
547 # @param otherHeader a string representing a new header (without the > and the \n)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
548 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
549 def writeWithOtherHeader( self, faFileHandler, otherHeader ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
550 self.header = otherHeader
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
551 self.write( faFileHandler )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
552
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
553
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
554 ## Append Bioseq header and Bioseq sequence in a fasta file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
555 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
556 # @param faFileHandler file handler on a file with writing right
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
557 # @param otherHeader a string representing a new header (without the > and the \n)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
558 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
559 def writeABioseqInAFastaFileWithOtherHeader( self, faFileHandler, otherHeader ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
560 self.writeWithOtherHeader( faFileHandler, otherHeader )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
561
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
562
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
563 ## get the list of Maps corresponding to seq without gap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
564 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
565 # @warning This method was called getMap() in pyRepet.Bioseq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
566 # @return a list of Map object
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
567 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
568 def getLMapWhithoutGap( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
569 lMaps = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
570 countSite = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
571 countSubseq = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
572 inGap = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
573 startMap = -1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
574 endMap = -1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
575
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
576 # initialize with the first site
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
577 if self.sequence[0] == "-":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
578 inGap = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
579 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
580 startMap = countSite
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
581
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
582 # for each remaining site
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
583 for site in self.sequence[1:]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
584 countSite += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
585
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
586 # if it is a gap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
587 if site == "-":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
588
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
589 # if this is the beginning of a gap, record the previous subsequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
590 if inGap == False:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
591 inGap = True
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
592 endMap = countSite - 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
593 lMaps.append( Map( "%s_subSeq%i" % (self.header,countSubseq), self.header, startMap, endMap ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
594 countSubseq += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
595
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
596 # if it is NOT a gap
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
597 if site != "-":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
598
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
599 # if it is the end of a gap, begin the next subsequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
600 if inGap == True:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
601 inGap = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
602 startMap = countSite
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
603
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
604 # if it is the last site
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
605 if countSite == self.getLength():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
606 endMap = countSite
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
607 lMaps.append( Map( "%s_subSeq%i" % (self.header,countSubseq), self.header, startMap, endMap ) )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
608
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
609 return lMaps
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
610
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
611
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
612 ## get the percentage of GC
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
613 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
614 # @return a percentage
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
615 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
616 def getGCpercentage( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
617 tmpSeq = self.getSeqWithOnlyATGCN()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
618 nbGC = tmpSeq.count( "G" ) + tmpSeq.count( "C" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
619 return 100 * nbGC / float( self.getLength() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
620
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
621 ## get the percentage of GC of a sequence without counting N in sequence length
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
622 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
623 # @return a percentage
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
624 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
625 def getGCpercentageInSequenceWithoutCountNInLength(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
626 tmpSeq = self.getSeqWithOnlyATGCN()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
627 nbGC = tmpSeq.count( "G" ) + tmpSeq.count( "C" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
628 return 100 * nbGC / float( self.getLength() - self.countNt("N") )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
629
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
630 ## get the 5 prime subsequence of a given length at the given position
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
631 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
632 # @param position integer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
633 # @param flankLength integer subsequence length
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
634 # @return a sequence string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
635 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
636 def get5PrimeFlank(self, position, flankLength):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
637 if(position == 1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
638 return ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
639 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
640 startOfFlank = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
641 endOfFlank = position -1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
642
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
643 if((position - flankLength) > 0):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
644 startOfFlank = position - flankLength
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
645 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
646 startOfFlank = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
647
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
648 return self.subseq(startOfFlank, endOfFlank).sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
649
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
650
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
651 ## get the 3 prime subsequence of a given length at the given position
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
652 # In the case of indels, the polymorphism length can be specified
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
653 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
654 # @param position integer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
655 # @param flankLength integer subsequence length
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
656 # @param polymLength integer polymorphism length
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
657 # @return a sequence string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
658 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
659 def get3PrimeFlank(self, position, flankLength, polymLength = 1):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
660 if((position + polymLength) > len( self.sequence )):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
661 return ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
662 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
663 startOfFlank = position + polymLength
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
664
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
665 if((position+polymLength+flankLength) > len( self.sequence )):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
666 endOfFlank = len( self.sequence )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
667 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
668 endOfFlank = position+polymLength+flankLength-1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
669
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
670 return self.subseq(startOfFlank, endOfFlank).sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
671
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
672
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
673 def _createWordList(self,size,l=['A','T','G','C']):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
674 if size == 1 :
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
675 return l
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
676 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
677 l2 = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
678 for i in l:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
679 for j in ['A','T','G','C']:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
680 l2.append( i + j )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
681 return self._createWordList(size-1,l2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
682
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
683
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
684 def removeSymbol( self, symbol ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
685 tmp = self.sequence.replace( symbol, "" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
686 self.sequence = tmp