comparison env/lib/python3.7/site-packages/rdflib/tools/graphisomorphism.py @ 0:26e78fe6e8c4 draft

"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
author shellac
date Sat, 02 May 2020 07:14:21 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:26e78fe6e8c4
1 """
2 A commandline tool for testing if RDF graphs are isomorpic, i.e. equal
3 if BNode labels are ignored.
4 """
5
6 from rdflib.graph import Graph
7 from rdflib import BNode
8 try:
9 from itertools import combinations
10 assert combinations
11 except ImportError: # Python == 2.5
12 # Copied from
13 # http://docs.python.org/2/library/itertools.html#itertools.combinations
14 def combinations(iterable, r):
15 # combinations('ABCD', 2) --> AB AC AD BC BD CD
16 # combinations(range(4), 3) --> 012 013 023 123
17 pool = tuple(iterable)
18 n = len(pool)
19 if r > n:
20 return
21 indices = list(range(r))
22 yield tuple(pool[i] for i in indices)
23 while True:
24 for i in reversed(list(range(r))):
25 if indices[i] != i + n - r:
26 break
27 else:
28 return
29 indices[i] += 1
30 for j in range(i + 1, r):
31 indices[j] = indices[j - 1] + 1
32 yield tuple(pool[i] for i in indices)
33
34
35 class IsomorphicTestableGraph(Graph):
36 """
37 Ported from:
38 http://www.w3.org/2001/sw/DataAccess/proto-tests/tools/rdfdiff.py
39 (Sean B Palmer's RDF Graph Isomorphism Tester)
40 """
41 def __init__(self, **kargs):
42 super(IsomorphicTestableGraph, self).__init__(**kargs)
43 self.hash = None
44
45 def internal_hash(self):
46 """
47 This is defined instead of __hash__ to avoid a circular recursion
48 scenario with the Memory store for rdflib which requires a hash
49 lookup in order to return a generator of triples
50 """
51 return hash(tuple(sorted(self.hashtriples())))
52
53 def hashtriples(self):
54 for triple in self:
55 g = ((isinstance(t, BNode) and self.vhash(t)) or t for t in triple)
56 yield hash(tuple(g))
57
58 def vhash(self, term, done=False):
59 return tuple(sorted(self.vhashtriples(term, done)))
60
61 def vhashtriples(self, term, done):
62 for t in self:
63 if term in t:
64 yield tuple(self.vhashtriple(t, term, done))
65
66 def vhashtriple(self, triple, term, done):
67 for p in range(3):
68 if not isinstance(triple[p], BNode):
69 yield triple[p]
70 elif done or (triple[p] == term):
71 yield p
72 else:
73 yield self.vhash(triple[p], done=True)
74
75 def __eq__(self, G):
76 """Graph isomorphism testing."""
77 if not isinstance(G, IsomorphicTestableGraph):
78 return False
79 elif len(self) != len(G):
80 return False
81 elif list.__eq__(list(self), list(G)):
82 return True # @@
83 return self.internal_hash() == G.internal_hash()
84
85 def __ne__(self, G):
86 """Negative graph isomorphism testing."""
87 return not self.__eq__(G)
88
89
90 def main():
91 import sys
92 from optparse import OptionParser
93 usage = '''usage: %prog [options] file1 file2 ... fileN'''
94 op = OptionParser(usage=usage)
95 op.add_option('-s', '--stdin', action='store_true', default=False,
96 help='Load from STDIN as well')
97 op.add_option('--format',
98 default='xml',
99 dest='inputFormat',
100 metavar='RDF_FORMAT',
101 choices=['xml', 'trix', 'n3', 'nt', 'rdfa'],
102 help="The format of the RDF document(s) to compare" +
103 "One of 'xml','n3','trix', 'nt', " +
104 "or 'rdfa'. The default is %default")
105
106 (options, args) = op.parse_args()
107
108 graphs = []
109 graph2FName = {}
110 if options.stdin:
111 graph = IsomorphicTestableGraph().parse(
112 sys.stdin, format=options.inputFormat)
113 graphs.append(graph)
114 graph2FName[graph] = '(STDIN)'
115 for fn in args:
116 graph = IsomorphicTestableGraph().parse(
117 fn, format=options.inputFormat)
118 graphs.append(graph)
119 graph2FName[graph] = fn
120 checked = set()
121 for graph1, graph2 in combinations(graphs, 2):
122 if (graph1, graph2) not in checked and (graph2, graph1) not in checked:
123 assert graph1 == graph2, "%s != %s" % (
124 graph2FName[graph1], graph2FName[graph2])
125
126 if __name__ == '__main__':
127 main()