Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/rdflib/plugins/parsers/pyMicrodata/microdata.py @ 1:56ad4e20f292 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
author | guerler |
---|---|
date | Fri, 31 Jul 2020 00:32:28 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
0:d30785e31577 | 1:56ad4e20f292 |
---|---|
1 # -*- coding: utf-8 -*- | |
2 """ | |
3 | |
4 The core of the Microdata->RDF conversion, a more or less verbatim implementation of the | |
5 U{W3C IG Note<http://www.w3.org/TR/microdata-rdf/>}. Because the implementation was also used to check | |
6 the note itself, it tries to be fairly close to the text. | |
7 | |
8 | |
9 @organization: U{World Wide Web Consortium<http://www.w3.org>} | |
10 @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} | |
11 @license: This software is available for use under the | |
12 U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231">} | |
13 """ | |
14 | |
15 """ | |
16 $Id: microdata.py,v 1.4 2012/09/05 16:40:43 ivan Exp $ | |
17 $Date: 2012/09/05 16:40:43 $ | |
18 | |
19 Added a reaction on the RDFaStopParsing exception: if raised while setting up the local execution context, parsing | |
20 is stopped (on the whole subtree) | |
21 """ | |
22 | |
23 import sys | |
24 if sys.version_info[0] >= 3 : | |
25 from urllib.parse import urlsplit, urlunsplit | |
26 else : | |
27 from urllib.parse import urlsplit, urlunsplit | |
28 | |
29 from types import * | |
30 | |
31 import rdflib | |
32 from rdflib import URIRef | |
33 from rdflib import Literal | |
34 from rdflib import BNode | |
35 from rdflib import Namespace | |
36 if rdflib.__version__ >= "3.0.0" : | |
37 from rdflib import Graph | |
38 from rdflib import RDF as ns_rdf | |
39 from rdflib import RDFS as ns_rdfs | |
40 from rdflib import XSD as ns_xsd | |
41 else : | |
42 from rdflib.Graph import Graph | |
43 from rdflib.RDFS import RDFSNS as ns_rdfs | |
44 from rdflib.Literal import _XSD_NS as ns_xsd | |
45 from rdflib.RDF import RDFNS as ns_rdf | |
46 | |
47 ns_owl = Namespace("http://www.w3.org/2002/07/owl#") | |
48 | |
49 from .registry import registry, vocab_names | |
50 from .utils import generate_RDF_collection, get_Literal, get_time_type | |
51 from .utils import get_lang_from_hierarchy, is_absolute_URI, generate_URI, fragment_escape | |
52 | |
53 MD_VOCAB = "http://www.w3.org/ns/md#" | |
54 RDFA_VOCAB = URIRef("http://www.w3.org/ns/rdfa#usesVocabulary") | |
55 | |
56 from . import debug | |
57 | |
58 # Existing predicate schemes | |
59 class PropertySchemes : | |
60 vocabulary = "vocabulary" | |
61 contextual = "contextual" | |
62 | |
63 class ValueMethod : | |
64 unordered = "unordered" | |
65 list = "list" | |
66 | |
67 # ---------------------------------------------------------------------------- | |
68 | |
69 class Evaluation_Context : | |
70 """ | |
71 Evaluation context structure. See Section 4.1 of the U{W3C IG Note<http://www.w3.org/TR/microdata-rdf/>}for the details. | |
72 | |
73 @ivar current_type : an absolute URL for the current type, used when an item does not contain an item type | |
74 @ivar memory: mapping from items to RDF subjects | |
75 @type memory: dictionary | |
76 @ivar current_name: an absolute URL for the in-scope name, used for generating URIs for properties of items without an item type | |
77 @ivar current_vocabulary: an absolute URL for the current vocabulary, from the registry | |
78 """ | |
79 def __init__( self ) : | |
80 self.current_type = None | |
81 self.memory = {} | |
82 self.current_name = None | |
83 self.current_vocabulary = None | |
84 | |
85 def get_memory( self, item ) : | |
86 """ | |
87 Get the memory content (ie, RDF subject) for 'item', or None if not stored yet | |
88 @param item: an 'item', in microdata terminology | |
89 @type item: DOM Element Node | |
90 @return: None, or an RDF Subject (URIRef or BNode) | |
91 """ | |
92 if item in self.memory : | |
93 return self.memory[item] | |
94 else : | |
95 return None | |
96 | |
97 def set_memory( self, item, subject ) : | |
98 """ | |
99 Set the memory content, ie, the subject, for 'item'. | |
100 @param item: an 'item', in microdata terminology | |
101 @type item: DOM Element Node | |
102 @param subject: RDF Subject | |
103 @type subject: URIRef or Blank Node | |
104 """ | |
105 self.memory[item] = subject | |
106 | |
107 def new_copy(self, itype) : | |
108 """ | |
109 During the generation algorithm a new copy of the current context has to be done with a new current type. | |
110 | |
111 At the moment, the content of memory is copied, ie, a fresh dictionary is created and the content copied over. | |
112 Not clear whether that is necessary, though, maybe a simple reference is enough... | |
113 @param itype : an absolute URL for the current type | |
114 @return: a new evaluation context instance | |
115 """ | |
116 retval = Evaluation_Context() | |
117 for k in self.memory : | |
118 retval.memory[k] = self.memory[k] | |
119 | |
120 retval.current_type = itype | |
121 retval.current_name = self.current_name | |
122 retval.current_vocabulary = self.current_vocabulary | |
123 return retval | |
124 | |
125 def __str__(self) : | |
126 retval = "Evaluation context:\n" | |
127 retval += " current type: %s\n" % self.current_type | |
128 retval += " current name: %s\n" % self.current_name | |
129 retval += " current vocabulary: %s\n" % self.current_vocabulary | |
130 retval += " memory: %s\n" % self.memory | |
131 retval += "----\n" | |
132 return retval | |
133 | |
134 class Microdata : | |
135 """ | |
136 This class encapsulates methods that are defined by the U{microdata spec<http://dev.w3.org/html5/md/Overview.html>}, | |
137 as opposed to the RDF conversion note. | |
138 | |
139 @ivar document: top of the DOM tree, as returned by the HTML5 parser | |
140 @ivar base: the base URI of the Dom tree, either set from the outside or via a @base element | |
141 """ | |
142 def __init__( self, document, base = None) : | |
143 """ | |
144 @param document: top of the DOM tree, as returned by the HTML5 parser | |
145 @param base: the base URI of the Dom tree, either set from the outside or via a @base element | |
146 """ | |
147 self.document = document | |
148 | |
149 #----------------------------------------------------------------- | |
150 # set the document base, will be used to generate top level URIs | |
151 self.base = None | |
152 # handle the base element case for HTML | |
153 for set_base in document.getElementsByTagName("base") : | |
154 if set_base.hasAttribute("href") : | |
155 # Yep, there is a local setting for base | |
156 self.base = set_base.getAttribute("href") | |
157 return | |
158 # If got here, ie, if no local setting for base occurs, the input argument has it | |
159 self.base = base | |
160 | |
161 def get_top_level_items( self ) : | |
162 """ | |
163 A top level item is and element that has the @itemscope set, but no @itemtype. They have to | |
164 be collected in pre-order and depth-first fashion. | |
165 | |
166 @return: list of items (ie, DOM Nodes) | |
167 """ | |
168 def collect_items( node ) : | |
169 items = [] | |
170 for child in node.childNodes : | |
171 if child.nodeType == node.ELEMENT_NODE : | |
172 items += collect_items( child ) | |
173 | |
174 if node.hasAttribute("itemscope") and not node.hasAttribute("itemprop") : | |
175 # This is also a top level item | |
176 items.append(node) | |
177 | |
178 return items | |
179 | |
180 return collect_items( self.document ) | |
181 | |
182 def get_item_properties( self, item ) : | |
183 """ | |
184 Collect the item's properties, ie, all DOM descendent nodes with @itemprop until the subtree hits another | |
185 @itemscope. @itemrefs are also added at this point. | |
186 | |
187 @param item: current item | |
188 @type item: DOM Node | |
189 @return: array of items, ie, DOM Nodes | |
190 """ | |
191 # go down the tree until another itemprop is hit, take care of the itemrefs, too; see the microdata doc | |
192 # probably the ugliest stuff | |
193 # returns a series of element nodes. | |
194 # Is it worth filtering the ones with itemprop at that level??? | |
195 results = [] | |
196 memory = [ item ] | |
197 pending = [ child for child in item.childNodes if child.nodeType == item.ELEMENT_NODE ] | |
198 | |
199 if item.hasAttribute("itemref") : | |
200 for id in item.getAttribute("itemref").strip().split() : | |
201 obj = self.getElementById(id) | |
202 if obj != None : pending.append(obj) | |
203 | |
204 while len(pending) > 0 : | |
205 current = pending.pop(0) | |
206 if current in memory : | |
207 # in general this raises an error; the same item cannot be there twice. In this case this is | |
208 # simply ignored | |
209 continue | |
210 else : | |
211 # this for the check above | |
212 memory.append(current) | |
213 | |
214 # @itemscope is the barrier... | |
215 if not current.hasAttribute("itemscope") : | |
216 pending = [ child for child in current.childNodes if child.nodeType == child.ELEMENT_NODE ] + pending | |
217 | |
218 if current.hasAttribute("itemprop") and current.getAttribute("itemprop").strip() != "" : | |
219 results.append(current) | |
220 | |
221 return results | |
222 | |
223 def getElementById(self, id) : | |
224 """This is a method defined for DOM 2 HTML, but the HTML5 parser does not seem to define it. Oh well... | |
225 @param id: value of an @id attribute to look for | |
226 @return: array of nodes whose @id attribute matches C{id} (formally, there should be only one...) | |
227 """ | |
228 def collect_ids( node ) : | |
229 ids = [] | |
230 for child in node.childNodes : | |
231 if child.nodeType == node.ELEMENT_NODE : | |
232 ids += collect_ids( child ) | |
233 | |
234 if node.hasAttribute("id") and node.getAttribute("id") == id : | |
235 # This is also a top level item | |
236 ids.append(node) | |
237 | |
238 return ids | |
239 | |
240 ids = collect_ids(self.document) | |
241 if len(ids) > 0 : | |
242 return ids[0] | |
243 else : | |
244 return None | |
245 | |
246 class MicrodataConversion(Microdata) : | |
247 """ | |
248 Top level class encapsulating the conversion algorithms as described in the W3C note. | |
249 | |
250 @ivar graph: an RDF graph; an RDFLib Graph | |
251 @type graph: RDFLib Graph | |
252 @ivar document: top of the DOM tree, as returned by the HTML5 parser | |
253 @ivar ns_md: the Namespace for the microdata vocabulary | |
254 @ivar base: the base of the Dom tree, either set from the outside or via a @base element | |
255 """ | |
256 def __init__( self, document, graph, base = None, vocab_expansion = False, vocab_cache = True ) : | |
257 """ | |
258 @param graph: an RDF graph; an RDFLib Graph | |
259 @type graph: RDFLib Graph | |
260 @param document: top of the DOM tree, as returned by the HTML5 parser | |
261 @keyword base: the base of the Dom tree, either set from the outside or via a @base element | |
262 @keyword vocab_expansion: whether vocab expansion should be performed or not | |
263 @type vocab_expansion: Boolean | |
264 @keyword vocab_cache: if vocabulary expansion is done, then perform caching of the vocabulary data | |
265 @type vocab_cache: Boolean | |
266 """ | |
267 Microdata.__init__(self, document, base) | |
268 self.vocab_expansion = vocab_expansion | |
269 self.vocab_cache = vocab_cache | |
270 self.graph = graph | |
271 self.ns_md = Namespace( MD_VOCAB ) | |
272 self.graph.bind( "md",MD_VOCAB ) | |
273 self.vocabularies_used = False | |
274 | |
275 # Get the vocabularies defined in the registry bound to proper names, if any... | |
276 | |
277 def _use_rdfa_context () : | |
278 try : | |
279 from ..pyRdfa.initialcontext import initial_context | |
280 except : | |
281 from pyRdfa.initialcontext import initial_context | |
282 retval = {} | |
283 vocabs = initial_context["http://www.w3.org/2011/rdfa-context/rdfa-1.1"].ns | |
284 for prefix in list(vocabs.keys()) : | |
285 uri = vocabs[prefix] | |
286 if uri not in vocab_names and uri not in registry : retval[uri] = prefix | |
287 return retval | |
288 | |
289 for vocab in registry : | |
290 if vocab in vocab_names : | |
291 self.graph.bind( vocab_names[vocab],vocab ) | |
292 else : | |
293 hvocab = vocab + '#' | |
294 if hvocab in vocab_names : | |
295 self.graph.bind( vocab_names[hvocab],hvocab ) | |
296 | |
297 # Add the prefixes defined in the RDFa initial context to improve the outlook of the output | |
298 # I put this into a try: except: in case the pyRdfa package is not available... | |
299 try : | |
300 try : | |
301 from ..pyRdfa.initialcontext import initial_context | |
302 except : | |
303 from pyRdfa.initialcontext import initial_context | |
304 vocabs = initial_context["http://www.w3.org/2011/rdfa-context/rdfa-1.1"].ns | |
305 for prefix in list(vocabs.keys()) : | |
306 uri = vocabs[prefix] | |
307 if uri not in registry : | |
308 # if it is in the registry, then it may have needed some special microdata massage... | |
309 self.graph.bind( prefix,uri ) | |
310 except : | |
311 pass | |
312 | |
313 def convert( self ) : | |
314 """ | |
315 Top level entry to convert and generate all the triples. It finds the top level items, | |
316 and generates triples for each of them; additionally, it generates a top level entry point | |
317 to the items from base in the form of an RDF list. | |
318 """ | |
319 item_list = [] | |
320 for top_level_item in self.get_top_level_items() : | |
321 item_list.append( self.generate_triples(top_level_item, Evaluation_Context()) ) | |
322 list = generate_RDF_collection( self.graph, item_list ) | |
323 self.graph.add( (URIRef(self.base),self.ns_md["item"],list) ) | |
324 | |
325 # If the vocab expansion is also switched on, this is the time to do it. | |
326 | |
327 # This is the version with my current proposal: the basic expansion is always there; | |
328 # the follow-your-nose inclusion of vocabulary is optional | |
329 if self.vocabularies_used : | |
330 try : | |
331 try : | |
332 from ..pyRdfa.rdfs.process import MiniOWL, process_rdfa_sem | |
333 from ..pyRdfa.options import Options | |
334 except : | |
335 from pyRdfa.rdfs.process import MiniOWL, process_rdfa_sem | |
336 from pyRdfa.options import Options | |
337 # if we did not get here, the pyRdfa package could not be | |
338 # imported. Too bad, but life should go on in the except branch... | |
339 if self.vocab_expansion : | |
340 # This is the full deal | |
341 options = Options(vocab_expansion = self.vocab_expansion, vocab_cache = self.vocab_cache) | |
342 process_rdfa_sem(self.graph, options) | |
343 else : | |
344 MiniOWL(self.graph).closure() | |
345 except : | |
346 pass | |
347 | |
348 def generate_triples( self, item, context ) : | |
349 """ | |
350 Generate the triples for a specific item. See the W3C Note for the details. | |
351 | |
352 @param item: the DOM Node for the specific item | |
353 @type item: DOM Node | |
354 @param context: an instance of an evaluation context | |
355 @type context: L{Evaluation_Context} | |
356 @return: a URIRef or a BNode for the (RDF) subject | |
357 """ | |
358 # Step 1,2: if the subject has to be set, store it in memory | |
359 subject = context.get_memory( item ) | |
360 if subject == None : | |
361 # nop, there is no subject set. If there is a valid @itemid, that carries it | |
362 if item.hasAttribute("itemid") and is_absolute_URI( item.getAttribute("itemid") ): | |
363 subject = URIRef( item.getAttribute("itemid").strip() ) | |
364 else : | |
365 subject = BNode() | |
366 context.set_memory( item, subject ) | |
367 | |
368 # Step 3: set the type triples if any | |
369 types = [] | |
370 if item.hasAttribute("itemtype") : | |
371 types = item.getAttribute("itemtype").strip().split() | |
372 for t in types : | |
373 if is_absolute_URI( t ) : | |
374 self.graph.add( (subject, ns_rdf["type"], URIRef(t)) ) | |
375 | |
376 # Step 4, 5 and 6 to set the typing variable | |
377 if len(types) == 0 : | |
378 itype = None | |
379 else : | |
380 if is_absolute_URI(types[0]) : | |
381 itype = types[0] | |
382 context.current_name = None | |
383 elif context.current_type != None : | |
384 itype = context.current_type | |
385 else : | |
386 itype = None | |
387 | |
388 # Step 7, 8, 9: Check the registry for possible keys and set the vocab | |
389 vocab = None | |
390 if itype != None : | |
391 for key in list(registry.keys()) : | |
392 if itype.startswith(key) : | |
393 # There is a predefined vocabulary for this type... | |
394 vocab = key | |
395 # Step 7: Issue an rdfa usesVocabulary triple | |
396 self.graph.add( (URIRef(self.base), RDFA_VOCAB, URIRef(vocab))) | |
397 self.vocabularies_used = True | |
398 break | |
399 # The registry has not set the vocabulary; has to be extracted from the type | |
400 if vocab == None : | |
401 parsed = urlsplit(itype) | |
402 if parsed.fragment != "" : | |
403 vocab = urlunsplit( (parsed.scheme,parsed.netloc,parsed.path,parsed.query,"") ) + '#' | |
404 elif parsed.path == "" and parsed.query == "" : | |
405 vocab = itype | |
406 if vocab[-1] != '/' : vocab += '/' | |
407 else : | |
408 vocab = itype.rsplit('/',1)[0] + '/' | |
409 | |
410 # Step 9: update vocab in the context | |
411 if vocab != None : | |
412 context.current_vocabulary = vocab | |
413 elif item.hasAttribute("itemtype") : | |
414 context.current_vocabulary = None | |
415 | |
416 # Step 10: set up a property list; this will be used to generate triples later. | |
417 # each entry in the dictionary is an array of RDF objects | |
418 property_list = {} | |
419 | |
420 # Step 11: Get the item properties and run a cycle on those | |
421 for prop in self.get_item_properties(item) : | |
422 for name in prop.getAttribute("itemprop").strip().split() : | |
423 # 11.1.1. set a new context | |
424 new_context = context.new_copy(itype) | |
425 # 11.1.2, generate the URI for the property name, that will be the predicate | |
426 # Also update the context | |
427 new_context.current_name = predicate = self.generate_predicate_URI( name,new_context ) | |
428 # 11.1.3, generate the property value. The extra flag signals that the value is a new item | |
429 # Note that 10.1.4 step is done in the method itself, ie, a recursion may occur there | |
430 # if a new item is hit (in which case the return value is a RDF resource chaining to a subject) | |
431 value = self.get_property_value( prop, new_context ) | |
432 # 11.1.5, store all the values | |
433 if predicate in property_list : | |
434 property_list[predicate].append(value) | |
435 else : | |
436 property_list[predicate] = [ value ] | |
437 | |
438 # step 12: generate the triples | |
439 for property in list(property_list.keys()) : | |
440 self.generate_property_values( subject, URIRef(property), property_list[property], context ) | |
441 | |
442 # Step 13: return the subject to the caller | |
443 return subject | |
444 | |
445 def generate_predicate_URI( self, name, context ) : | |
446 """ | |
447 Generate a full URI for a predicate, using the type, the vocabulary, etc. | |
448 | |
449 For details of this entry, see Section 4.4 | |
450 @param name: name of the property, ie, what appears in @itemprop | |
451 @param context: an instance of an evaluation context | |
452 @type context: L{Evaluation_Context} | |
453 """ | |
454 if debug: print(( "name: %s, %s" % (name,context) )) | |
455 | |
456 # Step 1: absolute URI-s are fine, take them as they are | |
457 if is_absolute_URI(name) : return name | |
458 | |
459 # Step 2: if type is none, that this is just used as a fragment | |
460 # if not context.current_type : | |
461 if context.current_type == None and context.current_vocabulary == None : | |
462 if self.base[-1] == '#' : | |
463 b = self.base[:-1] | |
464 else : | |
465 b = self.base | |
466 return b + '#' + fragment_escape(name) | |
467 | |
468 #if context.current_type == None : | |
469 # return generate_URI( self.base, name ) | |
470 | |
471 # Step 3: set the scheme | |
472 try : | |
473 if context.current_vocabulary in registry and "propertyURI" in registry[context.current_vocabulary] : | |
474 scheme = registry[context.current_vocabulary]["propertyURI"] | |
475 else : | |
476 scheme = PropertySchemes.vocabulary | |
477 except : | |
478 # This is when the structure of the registry is broken | |
479 scheme = PropertySchemes.vocabulary | |
480 | |
481 name = fragment_escape( name ) | |
482 if scheme == PropertySchemes.contextual : | |
483 # Step 5.1 | |
484 s = context.current_name | |
485 # s = context.current_type | |
486 if s != None and s.startswith("http://www.w3.org/ns/md?type=") : | |
487 # Step 5.2 | |
488 expandedURI = s + '.' + name | |
489 else : | |
490 # Step 5.3 | |
491 expandedURI = "http://www.w3.org/ns/md?type=" + fragment_escape(context.current_type) + "&prop=" + name | |
492 else : | |
493 # Step 4 | |
494 if context.current_vocabulary[-1] == '#' or context.current_vocabulary[-1] == '/' : | |
495 expandedURI = context.current_vocabulary + name | |
496 else : | |
497 expandedURI = context.current_vocabulary + '#' + name | |
498 | |
499 # see if there are subproperty/equivalentproperty relations | |
500 try : | |
501 vocab_mapping = registry[context.current_vocabulary]["properties"][name] | |
502 # if we got that far, we may have some mappings | |
503 | |
504 expandedURIRef = URIRef(expandedURI) | |
505 try : | |
506 subpr = vocab_mapping["subPropertyOf"] | |
507 if subpr != None : | |
508 if isinstance(subpr,list) : | |
509 for p in subpr : | |
510 self.graph.add( (expandedURIRef, ns_rdfs["subPropertyOf"], URIRef(p)) ) | |
511 else : | |
512 self.graph.add( (expandedURIRef, ns_rdfs["subPropertyOf"], URIRef(subpr)) ) | |
513 except : | |
514 # Ok, no sub property | |
515 pass | |
516 try : | |
517 subpr = vocab_mapping["equivalentProperty"] | |
518 if subpr != None : | |
519 if isinstance(subpr,list) : | |
520 for p in subpr : | |
521 self.graph.add( (expandedURIRef, ns_owl["equivalentProperty"], URIRef(p)) ) | |
522 else : | |
523 self.graph.add( (expandedURIRef, ns_owl["equivalentProperty"], URIRef(subpr)) ) | |
524 except : | |
525 # Ok, no sub property | |
526 pass | |
527 except : | |
528 # no harm done, no extra vocabulary term | |
529 pass | |
530 | |
531 | |
532 return expandedURI | |
533 | |
534 def get_property_value(self, node, context) : | |
535 """ | |
536 Generate an RDF object, ie, the value of a property. Note that if this element contains | |
537 an @itemscope, then a recursive call to L{MicrodataConversion.generate_triples} is done and the | |
538 return value of that method (ie, the subject for the corresponding item) is return as an | |
539 object. | |
540 | |
541 Otherwise, either URIRefs are created for <a>, <img>, etc, elements, or a Literal; the latter | |
542 gets a time-related type for the <time> element. | |
543 | |
544 @param node: the DOM Node for which the property values should be generated | |
545 @type node: DOM Node | |
546 @param context: an instance of an evaluation context | |
547 @type context: L{Evaluation_Context} | |
548 @return: an RDF resource (URIRef, BNode, or Literal) | |
549 """ | |
550 URI_attrs = { | |
551 "audio" : "src", | |
552 "embed" : "src", | |
553 "iframe" : "src", | |
554 "img" : "src", | |
555 "source" : "src", | |
556 "track" : "src", | |
557 "video" : "src", | |
558 "data" : "src", | |
559 "a" : "href", | |
560 "area" : "href", | |
561 "link" : "href", | |
562 "object" : "data" | |
563 } | |
564 lang = get_lang_from_hierarchy( self.document, node ) | |
565 | |
566 if node.hasAttribute("itemscope") : | |
567 # THIS IS A RECURSION ENTRY POINT! | |
568 return self.generate_triples( node, context ) | |
569 | |
570 elif node.tagName in URI_attrs and node.hasAttribute(URI_attrs[node.tagName]) : | |
571 return URIRef( generate_URI( self.base, node.getAttribute(URI_attrs[node.tagName]).strip() ) ) | |
572 | |
573 elif node.tagName == "meta" and node.hasAttribute("content") : | |
574 if lang : | |
575 return Literal( node.getAttribute("content"), lang = lang ) | |
576 else : | |
577 return Literal( node.getAttribute("content") ) | |
578 | |
579 elif node.tagName == "meter" or node.tagName == "data" : | |
580 if node.hasAttribute("value") : | |
581 val = node.getAttribute("value") | |
582 # check whether the attribute value can be defined as a float or an integer | |
583 try : | |
584 fval = int(val) | |
585 dt = ns_xsd["integer"] | |
586 except : | |
587 # Well, not an int, try then a integer | |
588 try : | |
589 fval = float(val) | |
590 dt = ns_xsd["float"] | |
591 except : | |
592 # Sigh, this is not a valid value, but let it go through as a plain literal nevertheless | |
593 fval = val | |
594 dt = None | |
595 if dt : | |
596 return Literal( val, datatype = dt) | |
597 else : | |
598 return Literal( val ) | |
599 else : | |
600 return Literal( "" ) | |
601 | |
602 elif node.tagName == "time" and node.hasAttribute("datetime") : | |
603 litval = node.getAttribute("datetime") | |
604 dtype = get_time_type(litval) | |
605 if dtype : | |
606 return Literal( litval, datatype = dtype ) | |
607 else : | |
608 return Literal( litval ) | |
609 | |
610 else : | |
611 if lang : | |
612 return Literal( get_Literal(node), lang = lang ) | |
613 else : | |
614 return Literal( get_Literal(node) ) | |
615 | |
616 def generate_property_values( self, subject, predicate, objects, context) : | |
617 """ | |
618 Generate the property values for a specific subject and predicate. The context should specify whether | |
619 the objects should be added in an RDF list or each triples individually. | |
620 | |
621 @param subject: RDF subject | |
622 @type subject: RDFLib Node (URIRef or blank node) | |
623 @param predicate: RDF predicate | |
624 @type predicate: RDFLib URIRef | |
625 @param objects: RDF objects | |
626 @type objects: list of RDFLib nodes (URIRefs, Blank Nodes, or literals) | |
627 @param context: evaluation context | |
628 @type context: L{Evaluation_Context} | |
629 """ | |
630 # generate triples with a list, or a bunch of triples, depending on the context | |
631 # The biggest complication is to find the method... | |
632 method = ValueMethod.unordered | |
633 superproperties = None | |
634 | |
635 # This is necessary because predicate is a URIRef, and I am not sure the comparisons would work well | |
636 # to be tested, in fact... | |
637 pred_key = "%s" % predicate | |
638 for key in registry : | |
639 if predicate.startswith(key) : | |
640 # This the part of the registry corresponding to the predicate's vocabulary | |
641 registry_object = registry[key] | |
642 try : | |
643 if "multipleValues" in registry_object : method = registry_object["multipleValues"] | |
644 # The generic definition can be overwritten for a specific property. The simplest is to rely on a 'try' | |
645 # with the right structure... | |
646 try : | |
647 method = registry_object["properties"][pred_key[len(key):]]["multipleValues"] | |
648 except : | |
649 pass | |
650 except : | |
651 pass | |
652 | |
653 if method == ValueMethod.unordered : | |
654 for object in objects : | |
655 self.graph.add( (subject, predicate, object) ) | |
656 else : | |
657 self.graph.add( (subject,predicate,generate_RDF_collection( self.graph, objects )) ) | |
658 | |
659 | |
660 | |
661 | |
662 | |
663 |