comparison commons/core/sql/TableSeqAdaptator.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 # Copyright INRA (Institut National de la Recherche Agronomique)
2 # http://www.inra.fr
3 # http://urgi.versailles.inra.fr
4 #
5 # This software is governed by the CeCILL license under French law and
6 # abiding by the rules of distribution of free software. You can use,
7 # modify and/ or redistribute the software under the terms of the CeCILL
8 # license as circulated by CEA, CNRS and INRIA at the following URL
9 # "http://www.cecill.info".
10 #
11 # As a counterpart to the access to the source code and rights to copy,
12 # modify and redistribute granted by the license, users are provided only
13 # with a limited warranty and the software's author, the holder of the
14 # economic rights, and the successive licensors have only limited
15 # liability.
16 #
17 # In this respect, the user's attention is drawn to the risks associated
18 # with loading, using, modifying and/or developing or reproducing the
19 # software by the user in light of its specific status of free software,
20 # that may mean that it is complicated to manipulate, and that also
21 # therefore means that it is reserved for developers and experienced
22 # professionals having in-depth computer knowledge. Users are therefore
23 # encouraged to load and test the software's suitability as regards their
24 # requirements in conditions enabling the security of their systems and/or
25 # data to be ensured and, more generally, to use and operate it in the
26 # same conditions as regards security.
27 #
28 # The fact that you are presently reading this means that you have had
29 # knowledge of the CeCILL license and that you accept its terms.
30
31
32 import sys
33 from commons.core.sql.TableAdaptator import TableAdaptator
34 from commons.core.sql.ITableSeqAdaptator import ITableSeqAdaptator
35 from commons.core.coord.SetUtils import SetUtils
36 from commons.core.seq.Bioseq import Bioseq
37
38
39 ## Adaptator for a Seq table
40 #
41 class TableSeqAdaptator( TableAdaptator, ITableSeqAdaptator ):
42
43 ## Retrieve all the distinct accession names in a list.
44 #
45 # @return lAccessions list of accessions
46 #
47 def getAccessionsList( self ):
48 sqlCmd = "SELECT DISTINCT accession FROM %s;" % ( self._table )
49 lAccessions = self._getStringListWithSQLCmd(sqlCmd)
50 return lAccessions
51
52 ## Save sequences in a fasta file from a list of accession names.
53 #
54 # @param lAccessions list of accessions
55 # @param outFileName string Fasta file
56 #
57 def saveAccessionsListInFastaFile( self, lAccessions, outFileName ):
58 outFile = open( outFileName, "w" )
59 for ac in lAccessions:
60 bs = self.getBioseqFromHeader( ac )
61 bs.write(outFile)
62 outFile.close()
63
64 ## Get a bioseq instance given its header
65 #
66 # @param header string name of the sequence ('accession' field in the 'seq' table)
67 # @return bioseq instance
68 #
69 def getBioseqFromHeader( self, header ):
70 sqlCmd = "SELECT * FROM %s WHERE accession='%s';" % ( self._table, header )
71 self._iDb.execute( sqlCmd )
72 res = self._iDb.fetchall()
73 return Bioseq( res[0][0], res[0][1] )
74
75 ## Retrieve the length of a sequence given its name.
76 #
77 # @param accession name of the sequence
78 # @return seqLength integer length of the sequence
79 #
80 def getSeqLengthFromAccession( self, accession ):
81 sqlCmd = 'SELECT length FROM %s WHERE accession="%s"' % ( self._table, accession )
82 seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd)
83 return seqLength
84
85 ## Retrieve the length of a sequence given its description.
86 #
87 # @param description of the sequence
88 # @return seqLength integer length of the sequence
89 #
90 def getSeqLengthFromDescription( self, description ):
91 sqlCmd = 'SELECT length FROM %s WHERE description="%s"' % ( self._table, description )
92 seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd)
93 return seqLength
94
95 ## Retrieve all the accessions with length in a list of tuples
96 #
97 # @return lAccessionLengthTuples list of tuples
98 #
99 def getAccessionAndLengthList(self):
100 sqlCmd = 'SELECT accession, length FROM %s' % self._table
101 self._iDb.execute(sqlCmd)
102 res = self._iDb.fetchall()
103 lAccessionLengthTuples = []
104 for i in res:
105 lAccessionLengthTuples.append(i)
106 return lAccessionLengthTuples
107
108 ## get subsequence according to given parameters
109 #
110 # @param accession
111 # @param start integer
112 # @param end integer
113 # @return bioseq.sequence string
114 #
115 def getSubSequence( self, accession, start, end ):
116 bs = Bioseq()
117 if start <= 0 or end <= 0:
118 print "ERROR with coordinates start=%i or end=%i" % ( start, end )
119 sys.exit(1)
120
121 if accession not in self.getAccessionsList():
122 print "ERROR: accession '%s' absent from table '%s'" % ( accession, self._table )
123 sys.exit(1)
124
125 lengthAccession = self.getSeqLengthFromAccession( accession )
126 if start > lengthAccession or end > lengthAccession:
127 print "ERROR: coordinates start=%i end=%i out of sequence '%s' range (%i bp)" % ( start, end, accession, lengthAccession )
128 sys.exit(1)
129
130 sqlCmd = "SELECT SUBSTRING(sequence,%i,%i) FROM %s WHERE accession='%s'" % ( min(start,end), abs(end-start)+ 1, self._table, accession )
131 self._iDb.execute( sqlCmd )
132 res = self._iDb.fetchall()
133 bs.setSequence( res[0][0] )
134 if start > end:
135 bs.reverseComplement()
136 return bs.sequence
137
138 ## get bioseq from given set list
139 #
140 # @param lSets set list of sets
141 # @return bioseq instance
142 #
143 def getBioseqFromSetList( self, lSets ):
144 header = "%s::%i %s " % ( lSets[0].name, lSets[0].id, lSets[0].seqname )
145 sequence = ""
146 lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets )
147 if not lSets[0].isOnDirectStrand():
148 lSortedSets.reverse()
149 for iSet in lSortedSets:
150 header += "%i..%i," % ( iSet.getStart(), iSet.getEnd() )
151 sequence += self.getSubSequence( iSet.seqname, iSet.getStart(), iSet.getEnd() )
152 return Bioseq( header[:-1], sequence )
153
154 ## Return True if the given accession is present in the table
155 #
156 def isAccessionInTable( self, name ):
157 sqlCmd = "SELECT accession FROM %s WHERE accession='%s'" % ( self._table, name )
158 self._iDb.execute( sqlCmd )
159 res = self._iDb.fetchall()
160 return bool(res)
161
162 ## Retrieve all the distinct accession names in a fasta file.
163 #
164 # @param outFileName string Fasta file
165 #
166 def exportInFastaFile(self, outFileName ):
167 lAccessions = self.getAccessionsList()
168 self.saveAccessionsListInFastaFile( lAccessions, outFileName )
169
170 def _getStringListWithSQLCmd( self, sqlCmd ):
171 self._iDb.execute(sqlCmd)
172 res = self._iDb.fetchall()
173 lString = []
174 for i in res:
175 lString.append(i[0])
176 return lString
177
178 def _getTypeAndAttr2Insert(self, bs):
179 type2Insert = ( "'%s'", "'%s'", "'%s'", "'%i'" )
180 attr2Insert = (bs.header.split()[0], bs.sequence, bs.header, bs.getLength())
181 return type2Insert, attr2Insert
182
183 def _escapeAntislash(self, obj):
184 pass
185