36
|
1 #
|
|
2 # Copyright INRA-URGI 2009-2010
|
|
3 #
|
|
4 # This software is governed by the CeCILL license under French law and
|
|
5 # abiding by the rules of distribution of free software. You can use,
|
|
6 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
7 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
8 # "http://www.cecill.info".
|
|
9 #
|
|
10 # As a counterpart to the access to the source code and rights to copy,
|
|
11 # modify and redistribute granted by the license, users are provided only
|
|
12 # with a limited warranty and the software's author, the holder of the
|
|
13 # economic rights, and the successive licensors have only limited
|
|
14 # liability.
|
|
15 #
|
|
16 # In this respect, the user's attention is drawn to the risks associated
|
|
17 # with loading, using, modifying and/or developing or reproducing the
|
|
18 # software by the user in light of its specific status of free software,
|
|
19 # that may mean that it is complicated to manipulate, and that also
|
|
20 # therefore means that it is reserved for developers and experienced
|
|
21 # professionals having in-depth computer knowledge. Users are therefore
|
|
22 # encouraged to load and test the software's suitability as regards their
|
|
23 # requirements in conditions enabling the security of their systems and/or
|
|
24 # data to be ensured and, more generally, to use and operate it in the
|
|
25 # same conditions as regards security.
|
|
26 #
|
|
27 # The fact that you are presently reading this means that you have had
|
|
28 # knowledge of the CeCILL license and that you accept its terms.
|
|
29 #
|
|
30 import re
|
|
31 import sys
|
|
32
|
|
33 class MySqlTable(object):
|
|
34 """
|
|
35 Store a table of a mySQL database, used for transcripts or exons
|
|
36 Record a a name and a type (int, float, double) for each column
|
|
37 @ivar name: name of the table
|
|
38 @type name: string
|
|
39 @ivar variables: name of the columns
|
|
40 @type variables: list of string
|
|
41 @ivar types: type of the columns
|
|
42 @type types: dict of string
|
|
43 @ivar mySqlConnection: connection to a database
|
|
44 @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}
|
|
45 @ivar nbLines: number of rows
|
|
46 @type nbLines: int
|
|
47 @ivar verbosity: verbosity
|
|
48 @type verbosity: int
|
|
49 """
|
|
50
|
|
51 def __init__(self, connection, name, verbosity = 0):
|
|
52 """
|
|
53 Constructor
|
|
54 Possibly retrieve column names and types if table exists
|
|
55 @param mySqlConnection: connection to a databas
|
|
56 @type mySqlConnection: class L{MySqlConnection<MySqlConnection>}
|
|
57 @param name: name of the table
|
|
58 @type name: string
|
|
59 @param verbosity: verbosity
|
|
60 @type verbosity: int
|
|
61 """
|
|
62 self.name = name
|
|
63 self.variables = []
|
|
64 self.types = {}
|
|
65 self.sizes = {}
|
|
66 self.nbLines = None
|
|
67 self.verbosity = verbosity
|
|
68 self.mySqlConnection = connection
|
|
69 queryTables = self.mySqlConnection.executeQuery("SELECT name FROM sqlite_master WHERE type LIKE 'table' AND name LIKE '%s'" % (self.name))
|
|
70 self.created = not queryTables.isEmpty()
|
|
71 if self.created:
|
|
72 queryFields = self.mySqlConnection.executeQuery("PRAGMA table_info('%s')" % (name))
|
|
73 for field in queryFields.getIterator():
|
|
74 if field[1] != "id":
|
|
75 self.variables.append(field[1])
|
|
76 self.types[field[1]] = field[2]
|
|
77 self.sizes[field[1]] = field[3]
|
|
78
|
|
79
|
|
80 def getName(self):
|
|
81 return self.name
|
|
82
|
|
83
|
|
84 def create(self, variables, types, sizes):
|
|
85 """
|
|
86 Create a table using give column names and types
|
|
87 @param variables: names of the columns
|
|
88 @type variables: list of string
|
|
89 @param types: types of the columns
|
|
90 @type types: dict of string
|
|
91 @param sizes: sizes of the types
|
|
92 @type sizes: dict of int
|
|
93 """
|
|
94 self.variables = variables
|
|
95 self.types = types
|
|
96 self.sizes = sizes
|
|
97 if self.created:
|
|
98 self.remove()
|
|
99 query = "CREATE TABLE '%s' (id INTEGER PRIMARY KEY" % (self.name)
|
|
100 for variable in variables:
|
|
101 query = "%s, %s %s(%d)" % (query, variable, types[variable], sizes[variable])
|
|
102 query += ")"
|
|
103 self.mySqlConnection.executeQuery(query)
|
|
104 self.created = True
|
|
105
|
|
106
|
|
107 def insertMany(self, lines):
|
|
108 """
|
|
109 Insert many lines
|
|
110 @param lines: the list of values
|
|
111 @type lines: list of lists
|
|
112 """
|
|
113 commands = []
|
|
114 for values in lines:
|
|
115 commands.append("INSERT INTO '%s' (%s) VALUES (%s)" % (self.name, ", ".join(self.variables), ", ".join([MySqlTable.formatSql(values[variable], self.types[variable], self.sizes[variable]) for variable in self.variables])))
|
|
116 self.mySqlConnection.executeManyQueries(commands)
|
|
117
|
|
118
|
|
119 def insertManyFormatted(self, lines):
|
|
120 """
|
|
121 Insert many lines
|
|
122 @param lines: the list of values
|
|
123 @type lines: list of lists
|
|
124 """
|
|
125 replacer = ["?"] * len(self.variables)
|
|
126 command = "INSERT INTO '%s' (%s) VALUES (%s)" % (self.name, ", ".join(self.variables), ", ".join(replacer))
|
|
127 values = [[line[variable] for variable in self.variables] for line in lines]
|
|
128 self.mySqlConnection.executeManyFormattedQueries(command, values)
|
|
129
|
|
130
|
|
131 def rename(self, name):
|
|
132 """
|
|
133 Rename the table
|
|
134 @param name: the new name
|
|
135 @type name: string
|
|
136 """
|
|
137 self.mySqlConnection.executeQuery("RENAME TABLE '%s' TO '%s'" % (self.name, name))
|
|
138 self.name = name
|
|
139
|
|
140
|
|
141 def copy(self, table):
|
|
142 """
|
|
143 Copy the given table this one
|
|
144 @param table: the table to be copied
|
|
145 @type table: class L{MySqlTable<MySqlTable>}
|
|
146 """
|
|
147 variables = []
|
|
148 types = {}
|
|
149 sizes = {}
|
|
150 fields = self.mySqlConnection.executeQuery("PRAGMA table_info(%s)" % (table.name))
|
|
151 for field in fields.getIterator():
|
|
152 if field[1] != "id":
|
|
153 variables.append(field[1])
|
|
154 m = re.search(r"(\w+)\((\d+)\)", field[2])
|
|
155 if m == None:
|
|
156 raise Exception("\nFormat %s in table %s is strange." % (field[2], table.name))
|
|
157 types[field[1]] = m.group(1)
|
|
158 sizes[field[1]] = int(m.group(2))
|
|
159 self.create(variables, types, sizes)
|
|
160 self.mySqlConnection.executeQuery("INSERT INTO '%s' SELECT * FROM %s" % (self.name, table.name))
|
|
161
|
|
162
|
|
163 def add(self, table):
|
|
164 """
|
|
165 Add the content of a table to this one
|
|
166 @param table: the table to be added
|
|
167 @type table: class L{MySqlTable<MySqlTable>}
|
|
168 """
|
|
169 self.mySqlConnection.executeQuery("INSERT INTO '%s' SELECT * FROM %s" % (self.name, table.name))
|
|
170 self.created = True
|
|
171
|
|
172
|
|
173 def exists(self):
|
|
174 """
|
|
175 Check if the table exists in mySQL
|
|
176 @return: true if it exits
|
|
177 """
|
|
178 return self.created
|
|
179
|
|
180
|
|
181 def remove(self):
|
|
182 """
|
|
183 Remove this table
|
|
184 """
|
|
185 if self.exists():
|
|
186 query = "DROP TABLE IF EXISTS '%s'" % (self.name)
|
|
187 self.mySqlConnection.executeQuery(query)
|
|
188 self.created = False
|
|
189
|
|
190
|
|
191 def clear(self):
|
|
192 """
|
|
193 Clear the content of this table
|
|
194 """
|
|
195 self.mySqlConnection.executeQuery("DELETE FROM '%s'" % (self.name))
|
|
196
|
|
197
|
|
198 def getNbElements(self):
|
|
199 """
|
|
200 Count the number of rows in the table
|
|
201 """
|
|
202 command = "SELECT COUNT(id) FROM '%s'" % (self.name)
|
|
203 query = self.mySqlConnection.executeQuery(command)
|
|
204 return int(query.getLine()[0])
|
|
205
|
|
206
|
|
207 def formatSql(self, value, type, size):
|
|
208 """
|
|
209 Format a value using MySQL encapsulation
|
|
210 """
|
|
211 if type.find("int") != -1:
|
|
212 return "%d" % value
|
|
213 if type.find("float") != -1:
|
|
214 return "%.10f" % value
|
|
215 if type.find("double") != -1:
|
|
216 return "%.20f" % value
|
|
217 if type.find("varchar") != -1:
|
|
218 if len(value) > size:
|
|
219 return "'%s'" % value[0:size]
|
|
220 return "'%s'" % value
|
|
221 raise Exception("Do not understand type %s" % (type))
|
|
222 formatSql = classmethod(formatSql)
|
|
223
|
|
224
|
|
225 def addLine(self, values):
|
|
226 """
|
|
227 Add a row to this table
|
|
228 @param values: the values of the row
|
|
229 @type values: dict
|
|
230 @return: the id of the added row
|
|
231 """
|
|
232 sqlValues = [values[variable] for variable in self.variables]
|
|
233 command = "INSERT INTO '%s' (%%s) VALUES (%s)" % (self.name, ", ".join(self.variables))
|
|
234 id = self.mySqlConnection.executeFormattedQueryQuery(command, sqlValues, True)
|
|
235 return id
|
|
236 sqlValues = []
|
|
237 for variable in self.variables:
|
|
238 sqlValues.append(self.formatSql(values[variable], self.types[variable], self.sizes[variable]))
|
|
239 command = "INSERT INTO '%s' (%s) VALUES (%s)" % (self.name, ", ".join(self.variables), ", ".join(sqlValues))
|
|
240 id = self.mySqlConnection.executeQuery(command, True)
|
|
241 return id
|
|
242
|
|
243
|
|
244 def retrieveFromId(self, id):
|
|
245 """
|
|
246 Retrieve a row from its id
|
|
247 @param id: the id of the row
|
|
248 @type id: int
|
|
249 @return: the row
|
|
250 """
|
|
251 query = self.mySqlConnection.executeQuery("SELECT * FROM '%s' WHERE id = %d" % (self.name, id))
|
|
252 result = query.getLine()
|
|
253 if result == None:
|
|
254 raise Exception("Error! Id %d is not in the table %s!" % (id, self.name))
|
|
255 return result
|
|
256
|
|
257
|
|
258 def retrieveBulkFromId(self, ids):
|
|
259 """
|
|
260 Retrieve a row from its id
|
|
261 @param id: the ids of the row
|
|
262 @type id: list of int
|
|
263 @return: the row
|
|
264 """
|
|
265 if not ids:
|
|
266 return []
|
|
267 MAXSIZE = 1000
|
|
268 results = []
|
|
269 for batch in range(len(ids) / MAXSIZE + 1):
|
|
270 theseIds = ids[batch * MAXSIZE : (batch+1) * MAXSIZE]
|
|
271 if theseIds:
|
|
272 query = self.mySqlConnection.executeQuery("SELECT * FROM '%s' WHERE id IN (%s)" % (self.name, ", ".join(["%d" % (id) for id in theseIds])))
|
|
273 lines = query.getLines()
|
|
274 if len(lines) != len(theseIds):
|
|
275 raise Exception("Error! Some Ids of (%s) is are missing in the table '%s' (got %d instead of %d)!" % (", ".join(["%d" % (id) for id in theseIds]), self.name, len(lines)), len(theseIds))
|
|
276 results.extend(lines)
|
|
277 return results
|
|
278
|
|
279
|
|
280 def removeFromId(self, id):
|
|
281 """
|
|
282 Remove a row from its id
|
|
283 @param id: the id of the row
|
|
284 @type id: int
|
|
285 """
|
|
286 self.mySqlConnection.executeQuery("DELETE FROM '%s' WHERE id = %d" % (self.name, id))
|
|
287
|
|
288
|
|
289 def getIterator(self):
|
|
290 """
|
|
291 Iterate on the content of table
|
|
292 @return: iterator to the rows of the table
|
|
293 """
|
|
294 if not self.created:
|
|
295 return
|
|
296 MAXSIZE = 1000
|
|
297 query = self.mySqlConnection.executeQuery("SELECT count(id) FROM '%s'" % (self.name))
|
|
298 nbRows = int(query.getLine()[0])
|
|
299 for chunk in range((nbRows / MAXSIZE) + 1):
|
|
300 query = self.mySqlConnection.executeQuery("SELECT * FROM '%s' LIMIT %d, %d" % (self.name, chunk * MAXSIZE, MAXSIZE))
|
|
301 for line in query.getIterator():
|
|
302 yield line
|
|
303
|
|
304
|
|
305 def createIndex(self, indexName, values, unique = False, fullText = False):
|
|
306 """
|
|
307 Add an index on the table
|
|
308 @param indexName: name of the index
|
|
309 @type indexName: string
|
|
310 @param values: values to be indexed
|
|
311 @type values: string
|
|
312 @param unique: if the index is unique
|
|
313 @type unique: boolean
|
|
314 @param fullText: whether full text should be indexed
|
|
315 @type fullText: boolean
|
|
316 """
|
|
317 self.mySqlConnection.executeQuery("CREATE %s%sINDEX '%s' ON '%s' (%s)" % ("UNIQUE " if unique else "", "FULLTEXT " if fullText else "", indexName, self.name, ", ".join(values)))
|
|
318
|
|
319
|
|
320 def setDefaultTagValue(self, field, name, value):
|
|
321 """
|
|
322 Add a tag value
|
|
323 @param name: name of the tag
|
|
324 @type name: string
|
|
325 @param value: value of the tag
|
|
326 @type value: string or int
|
|
327 """
|
|
328 newData = {}
|
|
329 for line in MySqlTable.getIterator(self):
|
|
330 id = line[0]
|
|
331 tags = line[field]
|
|
332 if tags == '':
|
|
333 newTag = "%s=%s" % (name, value)
|
|
334 else:
|
|
335 newTag = "%s;%s=%s" % (tags, name, value)
|
|
336 if name not in [tag.split("=")[0] for tag in tags.split(";")]:
|
|
337 newData[id] = newTag
|
|
338 for id, tag in newData.iteritems():
|
|
339 query = self.mySqlConnection.executeQuery("UPDATE '%s' SET tags = '%s' WHERE id = %i" % (self.name, tag, id))
|
|
340
|
|
341
|
|
342
|
|
343 def show(self):
|
|
344 """
|
|
345 Drop the content of the current table
|
|
346 """
|
|
347 query = self.mySqlConnection.executeQuery("SELECT * FROM '%s'" % (self.name))
|
|
348 print query.getLines()
|
|
349
|