annotate sqlite.py @ 1:86c92a1d55fa draft

Uploaded
author sem4j
date Wed, 25 Sep 2013 01:29:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
1 # sqlite.py version:0.3
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
2 # USAGE: python sqlite.py <output_file> <explain_plan> <header> <str_select> <input_file1> <index1> <header1> <input_file2> <index2> <header2> ..
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
3
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
4 import sys, csv, sqlite3, time
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
5
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
6 argvs = sys.argv
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
7
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
8 if ((len(argvs) % 3) != 2):
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
9 print 'Number of argv is incorrect'
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
10 quit()
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
11
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
12 num_table = (len(argvs) - 4) / 3
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
13 print('Number of Tables: ' + str(num_table) + '\n')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
14
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
15 output_file = argvs[1]
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
16 explain_plan = argvs[2]
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
17 output_header = argvs[3]
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
18 str_select = argvs[4]
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
19
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
20 conn = sqlite3.connect(':memory:')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
21 conn.row_factory = sqlite3.Row
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
22 conn.text_factory = lambda x: unicode(x, "utf-8", "ignore")
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
23 cur = conn.cursor()
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
24
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
25 for i in range(1, num_table + 1):
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
26 input_file = argvs[3 * i + 2]
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
27 input_header = argvs[3 * i + 3]
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
28 str_index = argvs[3 * i + 4]
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
29
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
30 with open(input_file,'rb') as infile:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
31 dr = csv.reader(infile, delimiter='\t')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
32 to_db = []
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
33 row_count = 0
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
34 for row in dr:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
35 row_count += 1
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
36 values =[]
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
37 col_count = 0
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
38 for col in row:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
39 col_count += 1
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
40 values.append(col)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
41 if input_header == '1' and row_count == 1:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
42 header = values
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
43 print('Header:')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
44 else:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
45 to_db.append(values)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
46
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
47 # PREPARE DDL&DML
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
48 str_table1 = 't' + str(i) + '('
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
49 str_table2 = 't' + str(i) + '('
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
50 str_value = 'VALUES ('
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
51 for j in range(col_count):
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
52 if input_header == '1':
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
53 print(' ' + str(j+1) + ' ' + header[j]);
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
54 col_name = header[j]
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
55 else:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
56 col_name = 'c' + str(j + 1)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
57 str_table1 = str_table1 + col_name + ' NUMERIC'
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
58 str_table2 = str_table2 + col_name + ' '
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
59 str_value = str_value + '?'
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
60 if j != col_count - 1:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
61 str_table1 = str_table1 + ','
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
62 str_table2 = str_table2 + ','
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
63 str_value = str_value + ','
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
64 str_table1 = str_table1 + ')'
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
65 str_table2 = str_table2 + ')'
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
66 str_value = str_value + ')'
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
67 print('')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
68
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
69 # CREATE TABLE
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
70 str_create = 'CREATE TABLE ' + str_table1 + ';'
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
71 print(str_create)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
72 time_start = time.time()
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
73 cur.execute(str_create)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
74 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
75
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
76 # LOAD DATA
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
77 str_insert = 'INSERT INTO ' + str_table2 + ' ' + str_value + ';'
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
78 print(str_insert)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
79 time_start = time.time()
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
80 cur.executemany(str_insert, to_db)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
81 conn.commit()
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
82 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
83
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
84 # CREATE INDEX
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
85 array_idx = str_index.split(',')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
86 for col_idx in array_idx:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
87 if col_idx != '':
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
88 time_start = time.time()
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
89 if input_header == '1':
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
90 sql_index = 'CREATE INDEX idx_t' + str(i) +'c' + col_idx + ' on t' + str(i) + '(' + header[int(col_idx) - 1] + ');'
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
91 else:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
92 sql_index = 'CREATE INDEX idx_t' + str(i) +'c' + col_idx + ' on t' + str(i) + '(c' + col_idx + ');'
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
93 cur.execute(sql_index)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
94 print(sql_index)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
95 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
96
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
97 # SELECT
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
98 time_start = time.time()
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
99 print(str_select)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
100 cur.execute(str_select)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
101 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
102
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
103 # OUTPUT
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
104 out = open(output_file, 'w')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
105
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
106 # HEADER
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
107 if output_header == '1':
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
108 col_count = 0
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
109 for col in cur.description:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
110 col_count += 1
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
111 if col_count != len(cur.description):
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
112 out.write(str(col[0]) + '\t')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
113 else:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
114 out.write(str(col[0]) + '\n')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
115
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
116 # CONTENT
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
117 for row in cur:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
118 col_count = 0
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
119 for col in row:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
120 col_count += 1
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
121 if col_count != len(row):
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
122 out.write(str(col) + '\t')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
123 else:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
124 out.write(str(col) + '\n')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
125
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
126 out.close()
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
127
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
128 # EXPLAIN PLAN
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
129 if explain_plan == '1':
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
130 #conn.executescript('.explain on')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
131 cur.execute('explain query plan ' + str_select)
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
132 for row in cur:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
133 col_count = 0
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
134 for col in row:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
135 col_count += 1
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
136 if col_count != len(row):
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
137 print(str(col) + '\t')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
138 else:
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
139 print(str(col) + '\n')
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
140
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
141 cur.close()
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
142 conn.close()
86c92a1d55fa Uploaded
sem4j
parents:
diff changeset
143