annotate commons/core/sql/TableSeqAdaptator.py @ 10:c081f25e1572

Updated CompareOverlappingSmallRef.py
author m-zytnicki
date Thu, 14 Mar 2013 05:25:40 -0400
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 # Copyright INRA (Institut National de la Recherche Agronomique)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 # http://www.inra.fr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 # http://urgi.versailles.inra.fr
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 # "http://www.cecill.info".
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 # liability.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 # same conditions as regards security.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 import sys
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 from commons.core.sql.TableAdaptator import TableAdaptator
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 from commons.core.sql.ITableSeqAdaptator import ITableSeqAdaptator
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 from commons.core.coord.SetUtils import SetUtils
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 from commons.core.seq.Bioseq import Bioseq
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 ## Adaptator for a Seq table
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41 class TableSeqAdaptator( TableAdaptator, ITableSeqAdaptator ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 ## Retrieve all the distinct accession names in a list.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 # @return lAccessions list of accessions
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 def getAccessionsList( self ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 sqlCmd = "SELECT DISTINCT accession FROM %s;" % ( self._table )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 lAccessions = self._getStringListWithSQLCmd(sqlCmd)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 return lAccessions
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 ## Save sequences in a fasta file from a list of accession names.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 # @param lAccessions list of accessions
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 # @param outFileName string Fasta file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 def saveAccessionsListInFastaFile( self, lAccessions, outFileName ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 outFile = open( outFileName, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 for ac in lAccessions:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 bs = self.getBioseqFromHeader( ac )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 bs.write(outFile)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62 outFile.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 ## Get a bioseq instance given its header
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66 # @param header string name of the sequence ('accession' field in the 'seq' table)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 # @return bioseq instance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 def getBioseqFromHeader( self, header ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 sqlCmd = "SELECT * FROM %s WHERE accession='%s';" % ( self._table, header )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 self._iDb.execute( sqlCmd )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72 res = self._iDb.fetchall()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 return Bioseq( res[0][0], res[0][1] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 ## Retrieve the length of a sequence given its name.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 # @param accession name of the sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 # @return seqLength integer length of the sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80 def getSeqLengthFromAccession( self, accession ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81 sqlCmd = 'SELECT length FROM %s WHERE accession="%s"' % ( self._table, accession )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 return seqLength
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 ## Retrieve the length of a sequence given its description.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 # @param description of the sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88 # @return seqLength integer length of the sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 def getSeqLengthFromDescription( self, description ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91 sqlCmd = 'SELECT length FROM %s WHERE description="%s"' % ( self._table, description )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92 seqLength = self._iDb.getIntegerWithSQLCmd(sqlCmd)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93 return seqLength
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95 ## Retrieve all the accessions with length in a list of tuples
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97 # @return lAccessionLengthTuples list of tuples
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99 def getAccessionAndLengthList(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 sqlCmd = 'SELECT accession, length FROM %s' % self._table
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 self._iDb.execute(sqlCmd)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102 res = self._iDb.fetchall()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 lAccessionLengthTuples = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104 for i in res:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105 lAccessionLengthTuples.append(i)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106 return lAccessionLengthTuples
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108 ## get subsequence according to given parameters
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 # @param accession
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111 # @param start integer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112 # @param end integer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113 # @return bioseq.sequence string
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115 def getSubSequence( self, accession, start, end ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 bs = Bioseq()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117 if start <= 0 or end <= 0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118 print "ERROR with coordinates start=%i or end=%i" % ( start, end )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119 sys.exit(1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 if accession not in self.getAccessionsList():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122 print "ERROR: accession '%s' absent from table '%s'" % ( accession, self._table )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123 sys.exit(1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125 lengthAccession = self.getSeqLengthFromAccession( accession )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 if start > lengthAccession or end > lengthAccession:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127 print "ERROR: coordinates start=%i end=%i out of sequence '%s' range (%i bp)" % ( start, end, accession, lengthAccession )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128 sys.exit(1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130 sqlCmd = "SELECT SUBSTRING(sequence,%i,%i) FROM %s WHERE accession='%s'" % ( min(start,end), abs(end-start)+ 1, self._table, accession )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131 self._iDb.execute( sqlCmd )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132 res = self._iDb.fetchall()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133 bs.setSequence( res[0][0] )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134 if start > end:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135 bs.reverseComplement()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136 return bs.sequence
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138 ## get bioseq from given set list
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 # @param lSets set list of sets
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 # @return bioseq instance
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143 def getBioseqFromSetList( self, lSets ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144 header = "%s::%i %s " % ( lSets[0].name, lSets[0].id, lSets[0].seqname )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145 sequence = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146 lSortedSets = SetUtils.getSetListSortedByIncreasingMinThenMax( lSets )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147 if not lSets[0].isOnDirectStrand():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148 lSortedSets.reverse()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149 for iSet in lSortedSets:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 header += "%i..%i," % ( iSet.getStart(), iSet.getEnd() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151 sequence += self.getSubSequence( iSet.seqname, iSet.getStart(), iSet.getEnd() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152 return Bioseq( header[:-1], sequence )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154 ## Return True if the given accession is present in the table
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
156 def isAccessionInTable( self, name ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
157 sqlCmd = "SELECT accession FROM %s WHERE accession='%s'" % ( self._table, name )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
158 self._iDb.execute( sqlCmd )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
159 res = self._iDb.fetchall()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
160 return bool(res)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
161
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
162 ## Retrieve all the distinct accession names in a fasta file.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
163 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
164 # @param outFileName string Fasta file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
165 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
166 def exportInFastaFile(self, outFileName ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
167 lAccessions = self.getAccessionsList()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
168 self.saveAccessionsListInFastaFile( lAccessions, outFileName )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
169
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
170 def _getStringListWithSQLCmd( self, sqlCmd ):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
171 self._iDb.execute(sqlCmd)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
172 res = self._iDb.fetchall()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
173 lString = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
174 for i in res:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
175 lString.append(i[0])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
176 return lString
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
177
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
178 def _getTypeAndAttr2Insert(self, bs):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
179 type2Insert = ( "'%s'", "'%s'", "'%s'", "'%i'" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
180 attr2Insert = (bs.header.split()[0], bs.sequence, bs.header, bs.getLength())
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
181 return type2Insert, attr2Insert
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
182
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
183 def _escapeAntislash(self, obj):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
184 pass
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
185