comparison sqlite.py @ 1:86c92a1d55fa draft

Uploaded
author sem4j
date Wed, 25 Sep 2013 01:29:30 -0400
parents
children
comparison
equal deleted inserted replaced
0:9e9da6cf492b 1:86c92a1d55fa
1 # sqlite.py version:0.3
2 # USAGE: python sqlite.py <output_file> <explain_plan> <header> <str_select> <input_file1> <index1> <header1> <input_file2> <index2> <header2> ..
3
4 import sys, csv, sqlite3, time
5
6 argvs = sys.argv
7
8 if ((len(argvs) % 3) != 2):
9 print 'Number of argv is incorrect'
10 quit()
11
12 num_table = (len(argvs) - 4) / 3
13 print('Number of Tables: ' + str(num_table) + '\n')
14
15 output_file = argvs[1]
16 explain_plan = argvs[2]
17 output_header = argvs[3]
18 str_select = argvs[4]
19
20 conn = sqlite3.connect(':memory:')
21 conn.row_factory = sqlite3.Row
22 conn.text_factory = lambda x: unicode(x, "utf-8", "ignore")
23 cur = conn.cursor()
24
25 for i in range(1, num_table + 1):
26 input_file = argvs[3 * i + 2]
27 input_header = argvs[3 * i + 3]
28 str_index = argvs[3 * i + 4]
29
30 with open(input_file,'rb') as infile:
31 dr = csv.reader(infile, delimiter='\t')
32 to_db = []
33 row_count = 0
34 for row in dr:
35 row_count += 1
36 values =[]
37 col_count = 0
38 for col in row:
39 col_count += 1
40 values.append(col)
41 if input_header == '1' and row_count == 1:
42 header = values
43 print('Header:')
44 else:
45 to_db.append(values)
46
47 # PREPARE DDL&DML
48 str_table1 = 't' + str(i) + '('
49 str_table2 = 't' + str(i) + '('
50 str_value = 'VALUES ('
51 for j in range(col_count):
52 if input_header == '1':
53 print(' ' + str(j+1) + ' ' + header[j]);
54 col_name = header[j]
55 else:
56 col_name = 'c' + str(j + 1)
57 str_table1 = str_table1 + col_name + ' NUMERIC'
58 str_table2 = str_table2 + col_name + ' '
59 str_value = str_value + '?'
60 if j != col_count - 1:
61 str_table1 = str_table1 + ','
62 str_table2 = str_table2 + ','
63 str_value = str_value + ','
64 str_table1 = str_table1 + ')'
65 str_table2 = str_table2 + ')'
66 str_value = str_value + ')'
67 print('')
68
69 # CREATE TABLE
70 str_create = 'CREATE TABLE ' + str_table1 + ';'
71 print(str_create)
72 time_start = time.time()
73 cur.execute(str_create)
74 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
75
76 # LOAD DATA
77 str_insert = 'INSERT INTO ' + str_table2 + ' ' + str_value + ';'
78 print(str_insert)
79 time_start = time.time()
80 cur.executemany(str_insert, to_db)
81 conn.commit()
82 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
83
84 # CREATE INDEX
85 array_idx = str_index.split(',')
86 for col_idx in array_idx:
87 if col_idx != '':
88 time_start = time.time()
89 if input_header == '1':
90 sql_index = 'CREATE INDEX idx_t' + str(i) +'c' + col_idx + ' on t' + str(i) + '(' + header[int(col_idx) - 1] + ');'
91 else:
92 sql_index = 'CREATE INDEX idx_t' + str(i) +'c' + col_idx + ' on t' + str(i) + '(c' + col_idx + ');'
93 cur.execute(sql_index)
94 print(sql_index)
95 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
96
97 # SELECT
98 time_start = time.time()
99 print(str_select)
100 cur.execute(str_select)
101 print('Elapsed Time: ' + str(time.time() - time_start) + '\n')
102
103 # OUTPUT
104 out = open(output_file, 'w')
105
106 # HEADER
107 if output_header == '1':
108 col_count = 0
109 for col in cur.description:
110 col_count += 1
111 if col_count != len(cur.description):
112 out.write(str(col[0]) + '\t')
113 else:
114 out.write(str(col[0]) + '\n')
115
116 # CONTENT
117 for row in cur:
118 col_count = 0
119 for col in row:
120 col_count += 1
121 if col_count != len(row):
122 out.write(str(col) + '\t')
123 else:
124 out.write(str(col) + '\n')
125
126 out.close()
127
128 # EXPLAIN PLAN
129 if explain_plan == '1':
130 #conn.executescript('.explain on')
131 cur.execute('explain query plan ' + str_select)
132 for row in cur:
133 col_count = 0
134 for col in row:
135 col_count += 1
136 if col_count != len(row):
137 print(str(col) + '\t')
138 else:
139 print(str(col) + '\n')
140
141 cur.close()
142 conn.close()
143