Mercurial > repos > sem4j > sql_tools
comparison sqlite.py @ 1:86c92a1d55fa draft
Uploaded
author | sem4j |
---|---|
date | Wed, 25 Sep 2013 01:29:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:9e9da6cf492b | 1:86c92a1d55fa |
---|---|
1 # sqlite.py version:0.3 | |
2 # USAGE: python sqlite.py <output_file> <explain_plan> <header> <str_select> <input_file1> <index1> <header1> <input_file2> <index2> <header2> .. | |
3 | |
4 import sys, csv, sqlite3, time | |
5 | |
6 argvs = sys.argv | |
7 | |
8 if ((len(argvs) % 3) != 2): | |
9 print 'Number of argv is incorrect' | |
10 quit() | |
11 | |
12 num_table = (len(argvs) - 4) / 3 | |
13 print('Number of Tables: ' + str(num_table) + '\n') | |
14 | |
15 output_file = argvs[1] | |
16 explain_plan = argvs[2] | |
17 output_header = argvs[3] | |
18 str_select = argvs[4] | |
19 | |
20 conn = sqlite3.connect(':memory:') | |
21 conn.row_factory = sqlite3.Row | |
22 conn.text_factory = lambda x: unicode(x, "utf-8", "ignore") | |
23 cur = conn.cursor() | |
24 | |
25 for i in range(1, num_table + 1): | |
26 input_file = argvs[3 * i + 2] | |
27 input_header = argvs[3 * i + 3] | |
28 str_index = argvs[3 * i + 4] | |
29 | |
30 with open(input_file,'rb') as infile: | |
31 dr = csv.reader(infile, delimiter='\t') | |
32 to_db = [] | |
33 row_count = 0 | |
34 for row in dr: | |
35 row_count += 1 | |
36 values =[] | |
37 col_count = 0 | |
38 for col in row: | |
39 col_count += 1 | |
40 values.append(col) | |
41 if input_header == '1' and row_count == 1: | |
42 header = values | |
43 print('Header:') | |
44 else: | |
45 to_db.append(values) | |
46 | |
47 # PREPARE DDL&DML | |
48 str_table1 = 't' + str(i) + '(' | |
49 str_table2 = 't' + str(i) + '(' | |
50 str_value = 'VALUES (' | |
51 for j in range(col_count): | |
52 if input_header == '1': | |
53 print(' ' + str(j+1) + ' ' + header[j]); | |
54 col_name = header[j] | |
55 else: | |
56 col_name = 'c' + str(j + 1) | |
57 str_table1 = str_table1 + col_name + ' NUMERIC' | |
58 str_table2 = str_table2 + col_name + ' ' | |
59 str_value = str_value + '?' | |
60 if j != col_count - 1: | |
61 str_table1 = str_table1 + ',' | |
62 str_table2 = str_table2 + ',' | |
63 str_value = str_value + ',' | |
64 str_table1 = str_table1 + ')' | |
65 str_table2 = str_table2 + ')' | |
66 str_value = str_value + ')' | |
67 print('') | |
68 | |
69 # CREATE TABLE | |
70 str_create = 'CREATE TABLE ' + str_table1 + ';' | |
71 print(str_create) | |
72 time_start = time.time() | |
73 cur.execute(str_create) | |
74 print('Elapsed Time: ' + str(time.time() - time_start) + '\n') | |
75 | |
76 # LOAD DATA | |
77 str_insert = 'INSERT INTO ' + str_table2 + ' ' + str_value + ';' | |
78 print(str_insert) | |
79 time_start = time.time() | |
80 cur.executemany(str_insert, to_db) | |
81 conn.commit() | |
82 print('Elapsed Time: ' + str(time.time() - time_start) + '\n') | |
83 | |
84 # CREATE INDEX | |
85 array_idx = str_index.split(',') | |
86 for col_idx in array_idx: | |
87 if col_idx != '': | |
88 time_start = time.time() | |
89 if input_header == '1': | |
90 sql_index = 'CREATE INDEX idx_t' + str(i) +'c' + col_idx + ' on t' + str(i) + '(' + header[int(col_idx) - 1] + ');' | |
91 else: | |
92 sql_index = 'CREATE INDEX idx_t' + str(i) +'c' + col_idx + ' on t' + str(i) + '(c' + col_idx + ');' | |
93 cur.execute(sql_index) | |
94 print(sql_index) | |
95 print('Elapsed Time: ' + str(time.time() - time_start) + '\n') | |
96 | |
97 # SELECT | |
98 time_start = time.time() | |
99 print(str_select) | |
100 cur.execute(str_select) | |
101 print('Elapsed Time: ' + str(time.time() - time_start) + '\n') | |
102 | |
103 # OUTPUT | |
104 out = open(output_file, 'w') | |
105 | |
106 # HEADER | |
107 if output_header == '1': | |
108 col_count = 0 | |
109 for col in cur.description: | |
110 col_count += 1 | |
111 if col_count != len(cur.description): | |
112 out.write(str(col[0]) + '\t') | |
113 else: | |
114 out.write(str(col[0]) + '\n') | |
115 | |
116 # CONTENT | |
117 for row in cur: | |
118 col_count = 0 | |
119 for col in row: | |
120 col_count += 1 | |
121 if col_count != len(row): | |
122 out.write(str(col) + '\t') | |
123 else: | |
124 out.write(str(col) + '\n') | |
125 | |
126 out.close() | |
127 | |
128 # EXPLAIN PLAN | |
129 if explain_plan == '1': | |
130 #conn.executescript('.explain on') | |
131 cur.execute('explain query plan ' + str_select) | |
132 for row in cur: | |
133 col_count = 0 | |
134 for col in row: | |
135 col_count += 1 | |
136 if col_count != len(row): | |
137 print(str(col) + '\t') | |
138 else: | |
139 print(str(col) + '\n') | |
140 | |
141 cur.close() | |
142 conn.close() | |
143 |