annotate tab2rdf.py @ 6:c1f5078f2a46 draft

Uploaded
author sem4j
date Wed, 25 Sep 2013 02:39:20 -0400
parents 75c072490c2d
children 360ae4d3d06c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
75c072490c2d Uploaded
sem4j
parents:
diff changeset
1 # tab2rdf.py version:0.1
75c072490c2d Uploaded
sem4j
parents:
diff changeset
2 # USAGE: python tab2rdf.py <input_file> <output_file> <output_format> <namespace>
75c072490c2d Uploaded
sem4j
parents:
diff changeset
3 # <s1_col> <p1_val> <o1_col> <o1_uri/val> <s2_col> <p2_val> <o2_col> <o2_uri/val> ..
75c072490c2d Uploaded
sem4j
parents:
diff changeset
4 # USAGE: python tab2rdf.py <input_file> <output_file> <output_format> multi_namespaces <column1> <namaspace1> <column2> <namespace2> ..
75c072490c2d Uploaded
sem4j
parents:
diff changeset
5
75c072490c2d Uploaded
sem4j
parents:
diff changeset
6 import sys, csv, sqlite3, time
75c072490c2d Uploaded
sem4j
parents:
diff changeset
7
75c072490c2d Uploaded
sem4j
parents:
diff changeset
8 argvs = sys.argv
75c072490c2d Uploaded
sem4j
parents:
diff changeset
9 num_triple = (len(argvs) - 4) / 4
75c072490c2d Uploaded
sem4j
parents:
diff changeset
10 print('Number of Triples for One Column: ' + str(num_triple) + '\n')
75c072490c2d Uploaded
sem4j
parents:
diff changeset
11
75c072490c2d Uploaded
sem4j
parents:
diff changeset
12 input_file = argvs[1]
75c072490c2d Uploaded
sem4j
parents:
diff changeset
13 output_file = argvs[2]
75c072490c2d Uploaded
sem4j
parents:
diff changeset
14 output_format = argvs[3]
75c072490c2d Uploaded
sem4j
parents:
diff changeset
15 ns = argvs[4]
75c072490c2d Uploaded
sem4j
parents:
diff changeset
16
75c072490c2d Uploaded
sem4j
parents:
diff changeset
17 # OUTPUT
75c072490c2d Uploaded
sem4j
parents:
diff changeset
18 out = open(output_file, 'w')
75c072490c2d Uploaded
sem4j
parents:
diff changeset
19
75c072490c2d Uploaded
sem4j
parents:
diff changeset
20 with open(input_file,'rb') as infile:
75c072490c2d Uploaded
sem4j
parents:
diff changeset
21 dr = csv.reader(infile, delimiter='\t')
75c072490c2d Uploaded
sem4j
parents:
diff changeset
22 row_count = 0
75c072490c2d Uploaded
sem4j
parents:
diff changeset
23 for row in dr:
75c072490c2d Uploaded
sem4j
parents:
diff changeset
24 row_count += 1
75c072490c2d Uploaded
sem4j
parents:
diff changeset
25 values = []
75c072490c2d Uploaded
sem4j
parents:
diff changeset
26 col_count = 0
75c072490c2d Uploaded
sem4j
parents:
diff changeset
27 for col in row:
75c072490c2d Uploaded
sem4j
parents:
diff changeset
28 col_count += 1
75c072490c2d Uploaded
sem4j
parents:
diff changeset
29 values.append(col)
75c072490c2d Uploaded
sem4j
parents:
diff changeset
30 for i in range(0, num_triple):
75c072490c2d Uploaded
sem4j
parents:
diff changeset
31 s_val = values[int(argvs[4 * i + 5]) - 1]
75c072490c2d Uploaded
sem4j
parents:
diff changeset
32 p_val = argvs[4 * i + 6]
75c072490c2d Uploaded
sem4j
parents:
diff changeset
33 o_val = values[int(argvs[4 * i + 7]) - 1]
75c072490c2d Uploaded
sem4j
parents:
diff changeset
34 if int(argvs[4 * i + 8]) :
75c072490c2d Uploaded
sem4j
parents:
diff changeset
35 out.write('<' + ns + s_val + '> <' + ns + p_val + '> <' + ns + o_val + '> .\n')
75c072490c2d Uploaded
sem4j
parents:
diff changeset
36 else :
75c072490c2d Uploaded
sem4j
parents:
diff changeset
37 out.write('<' + ns + s_val + '> <' + ns + p_val + '> "' + o_val + '" .\n')
75c072490c2d Uploaded
sem4j
parents:
diff changeset
38
75c072490c2d Uploaded
sem4j
parents:
diff changeset
39 out.close()
75c072490c2d Uploaded
sem4j
parents:
diff changeset
40