3
|
1 # tab2rdf.py version:0.1
|
|
2 # USAGE: python tab2rdf.py <input_file> <output_file> <output_format> <namespace>
|
|
3 # <s1_col> <p1_val> <o1_col> <o1_uri/val> <s2_col> <p2_val> <o2_col> <o2_uri/val> ..
|
|
4 # USAGE: python tab2rdf.py <input_file> <output_file> <output_format> multi_namespaces <column1> <namaspace1> <column2> <namespace2> ..
|
|
5
|
|
6 import sys, csv, sqlite3, time
|
|
7
|
|
8 argvs = sys.argv
|
|
9 num_triple = (len(argvs) - 4) / 4
|
|
10 print('Number of Triples for One Column: ' + str(num_triple) + '\n')
|
|
11
|
|
12 input_file = argvs[1]
|
|
13 output_file = argvs[2]
|
|
14 output_format = argvs[3]
|
|
15 ns = argvs[4]
|
|
16
|
|
17 # OUTPUT
|
|
18 out = open(output_file, 'w')
|
|
19
|
|
20 with open(input_file,'rb') as infile:
|
|
21 dr = csv.reader(infile, delimiter='\t')
|
|
22 row_count = 0
|
|
23 for row in dr:
|
|
24 row_count += 1
|
|
25 values = []
|
|
26 col_count = 0
|
|
27 for col in row:
|
|
28 col_count += 1
|
|
29 values.append(col)
|
|
30 for i in range(0, num_triple):
|
|
31 s_val = values[int(argvs[4 * i + 5]) - 1]
|
|
32 p_val = argvs[4 * i + 6]
|
|
33 o_val = values[int(argvs[4 * i + 7]) - 1]
|
|
34 if int(argvs[4 * i + 8]) :
|
|
35 out.write('<' + ns + s_val + '> <' + ns + p_val + '> <' + ns + o_val + '> .\n')
|
|
36 else :
|
|
37 out.write('<' + ns + s_val + '> <' + ns + p_val + '> "' + o_val + '" .\n')
|
|
38
|
|
39 out.close()
|
|
40
|