1
|
1 # sqlite.py version:0.3
|
|
2 # USAGE: python sqlite.py <output_file> <explain_plan> <header> <str_select> <input_file1> <index1> <header1> <input_file2> <index2> <header2> ..
|
|
3
|
|
4 import sys, csv, sqlite3, time
|
|
5
|
|
6 argvs = sys.argv
|
|
7
|
|
8 if ((len(argvs) % 3) != 2):
|
|
9 print 'Number of argv is incorrect'
|
|
10 quit()
|
|
11
|
|
12 num_table = (len(argvs) - 4) / 3
|
|
13 print('Number of Tables: ' + str(num_table) + '\n')
|
|
14
|
|
15 output_file = argvs[1]
|
|
16 explain_plan = argvs[2]
|
|
17 output_header = argvs[3]
|
|
18 str_select = argvs[4]
|
|
19
|
|
20 conn = sqlite3.connect(':memory:')
|
|
21 conn.row_factory = sqlite3.Row
|
|
22 conn.text_factory = lambda x: unicode(x, "utf-8", "ignore")
|
|
23 cur = conn.cursor()
|
|
24
|
|
25 for i in range(1, num_table + 1):
|
|
26 input_file = argvs[3 * i + 2]
|
|
27 input_header = argvs[3 * i + 3]
|
|
28 str_index = argvs[3 * i + 4]
|
|
29
|
|
30 with open(input_file,'rb') as infile:
|
|
31 dr = csv.reader(infile, delimiter='\t')
|
|
32 to_db = []
|
|
33 row_count = 0
|
|
34 for row in dr:
|
|
35 row_count += 1
|
|
36 values =[]
|
|
37 col_count = 0
|
|
38 for col in row:
|
|
39 col_count += 1
|
|
40 values.append(col)
|
|
41 if input_header == '1' and row_count == 1:
|
|
42 header = values
|
|
43 print('Header:')
|
|
44 else:
|
|
45 to_db.append(values)
|
|
46
|
|
47 # PREPARE DDL&DML
|
|
48 str_table1 = 't' + str(i) + '('
|
|
49 str_table2 = 't' + str(i) + '('
|
|
50 str_value = 'VALUES ('
|
|
51 for j in range(col_count):
|
|
52 if input_header == '1':
|
|
53 print(' ' + str(j+1) + ' ' + header[j]);
|
|
54 col_name = header[j]
|
|
55 else:
|
|
56 col_name = 'c' + str(j + 1)
|
|
57 str_table1 = str_table1 + col_name + ' NUMERIC'
|
|
58 str_table2 = str_table2 + col_name + ' '
|
|
59 str_value = str_value + '?'
|
|
60 if j != col_count - 1:
|
|
61 str_table1 = str_table1 + ','
|
|
62 str_table2 = str_table2 + ','
|
|
63 str_value = str_value + ','
|
|
64 str_table1 = str_table1 + ')'
|
|
65 str_table2 = str_table2 + ')'
|
|
66 str_value = str_value + ')'
|
|
67 print('')
|
|
68
|
|
69 # CREATE TABLE
|
|
70 str_create = 'CREATE TABLE ' + str_table1 + ';'
|
|
71 print(str_create)
|
|
72 time_start = time.time()
|
|
73 cur.execute(str_create)
|
|
74 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
|
75
|
|
76 # LOAD DATA
|
|
77 str_insert = 'INSERT INTO ' + str_table2 + ' ' + str_value + ';'
|
|
78 print(str_insert)
|
|
79 time_start = time.time()
|
|
80 cur.executemany(str_insert, to_db)
|
|
81 conn.commit()
|
|
82 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
|
83
|
|
84 # CREATE INDEX
|
|
85 array_idx = str_index.split(',')
|
|
86 for col_idx in array_idx:
|
|
87 if col_idx != '':
|
|
88 time_start = time.time()
|
|
89 if input_header == '1':
|
|
90 sql_index = 'CREATE INDEX idx_t' + str(i) +'c' + col_idx + ' on t' + str(i) + '(' + header[int(col_idx) - 1] + ');'
|
|
91 else:
|
|
92 sql_index = 'CREATE INDEX idx_t' + str(i) +'c' + col_idx + ' on t' + str(i) + '(c' + col_idx + ');'
|
|
93 cur.execute(sql_index)
|
|
94 print(sql_index)
|
|
95 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
|
96
|
|
97 # SELECT
|
|
98 time_start = time.time()
|
|
99 print(str_select)
|
|
100 cur.execute(str_select)
|
|
101 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
|
|
102
|
|
103 # OUTPUT
|
|
104 out = open(output_file, 'w')
|
|
105
|
|
106 # HEADER
|
|
107 if output_header == '1':
|
|
108 col_count = 0
|
|
109 for col in cur.description:
|
|
110 col_count += 1
|
|
111 if col_count != len(cur.description):
|
|
112 out.write(str(col[0]) + '\t')
|
|
113 else:
|
|
114 out.write(str(col[0]) + '\n')
|
|
115
|
|
116 # CONTENT
|
|
117 for row in cur:
|
|
118 col_count = 0
|
|
119 for col in row:
|
|
120 col_count += 1
|
|
121 if col_count != len(row):
|
|
122 out.write(str(col) + '\t')
|
|
123 else:
|
|
124 out.write(str(col) + '\n')
|
|
125
|
|
126 out.close()
|
|
127
|
|
128 # EXPLAIN PLAN
|
|
129 if explain_plan == '1':
|
|
130 #conn.executescript('.explain on')
|
|
131 cur.execute('explain query plan ' + str_select)
|
|
132 for row in cur:
|
|
133 col_count = 0
|
|
134 for col in row:
|
|
135 col_count += 1
|
|
136 if col_count != len(row):
|
|
137 print(str(col) + '\t')
|
|
138 else:
|
|
139 print(str(col) + '\n')
|
|
140
|
|
141 cur.close()
|
|
142 conn.close()
|
|
143
|