Mercurial > repos > guerler > hhblits
comparison lib/python3.8/site-packages/pip/_vendor/html5lib/html5parser.py @ 0:9e54283cc701 draft
"planemo upload commit d12c32a45bcd441307e632fca6d9af7d60289d44"
| author | guerler |
|---|---|
| date | Mon, 27 Jul 2020 03:47:31 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9e54283cc701 |
|---|---|
| 1 from __future__ import absolute_import, division, unicode_literals | |
| 2 from pip._vendor.six import with_metaclass, viewkeys | |
| 3 | |
| 4 import types | |
| 5 from collections import OrderedDict | |
| 6 | |
| 7 from . import _inputstream | |
| 8 from . import _tokenizer | |
| 9 | |
| 10 from . import treebuilders | |
| 11 from .treebuilders.base import Marker | |
| 12 | |
| 13 from . import _utils | |
| 14 from .constants import ( | |
| 15 spaceCharacters, asciiUpper2Lower, | |
| 16 specialElements, headingElements, cdataElements, rcdataElements, | |
| 17 tokenTypes, tagTokenTypes, | |
| 18 namespaces, | |
| 19 htmlIntegrationPointElements, mathmlTextIntegrationPointElements, | |
| 20 adjustForeignAttributes as adjustForeignAttributesMap, | |
| 21 adjustMathMLAttributes, adjustSVGAttributes, | |
| 22 E, | |
| 23 _ReparseException | |
| 24 ) | |
| 25 | |
| 26 | |
| 27 def parse(doc, treebuilder="etree", namespaceHTMLElements=True, **kwargs): | |
| 28 """Parse an HTML document as a string or file-like object into a tree | |
| 29 | |
| 30 :arg doc: the document to parse as a string or file-like object | |
| 31 | |
| 32 :arg treebuilder: the treebuilder to use when parsing | |
| 33 | |
| 34 :arg namespaceHTMLElements: whether or not to namespace HTML elements | |
| 35 | |
| 36 :returns: parsed tree | |
| 37 | |
| 38 Example: | |
| 39 | |
| 40 >>> from html5lib.html5parser import parse | |
| 41 >>> parse('<html><body><p>This is a doc</p></body></html>') | |
| 42 <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> | |
| 43 | |
| 44 """ | |
| 45 tb = treebuilders.getTreeBuilder(treebuilder) | |
| 46 p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) | |
| 47 return p.parse(doc, **kwargs) | |
| 48 | |
| 49 | |
| 50 def parseFragment(doc, container="div", treebuilder="etree", namespaceHTMLElements=True, **kwargs): | |
| 51 """Parse an HTML fragment as a string or file-like object into a tree | |
| 52 | |
| 53 :arg doc: the fragment to parse as a string or file-like object | |
| 54 | |
| 55 :arg container: the container context to parse the fragment in | |
| 56 | |
| 57 :arg treebuilder: the treebuilder to use when parsing | |
| 58 | |
| 59 :arg namespaceHTMLElements: whether or not to namespace HTML elements | |
| 60 | |
| 61 :returns: parsed tree | |
| 62 | |
| 63 Example: | |
| 64 | |
| 65 >>> from html5lib.html5libparser import parseFragment | |
| 66 >>> parseFragment('<b>this is a fragment</b>') | |
| 67 <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> | |
| 68 | |
| 69 """ | |
| 70 tb = treebuilders.getTreeBuilder(treebuilder) | |
| 71 p = HTMLParser(tb, namespaceHTMLElements=namespaceHTMLElements) | |
| 72 return p.parseFragment(doc, container=container, **kwargs) | |
| 73 | |
| 74 | |
| 75 def method_decorator_metaclass(function): | |
| 76 class Decorated(type): | |
| 77 def __new__(meta, classname, bases, classDict): | |
| 78 for attributeName, attribute in classDict.items(): | |
| 79 if isinstance(attribute, types.FunctionType): | |
| 80 attribute = function(attribute) | |
| 81 | |
| 82 classDict[attributeName] = attribute | |
| 83 return type.__new__(meta, classname, bases, classDict) | |
| 84 return Decorated | |
| 85 | |
| 86 | |
| 87 class HTMLParser(object): | |
| 88 """HTML parser | |
| 89 | |
| 90 Generates a tree structure from a stream of (possibly malformed) HTML. | |
| 91 | |
| 92 """ | |
| 93 | |
| 94 def __init__(self, tree=None, strict=False, namespaceHTMLElements=True, debug=False): | |
| 95 """ | |
| 96 :arg tree: a treebuilder class controlling the type of tree that will be | |
| 97 returned. Built in treebuilders can be accessed through | |
| 98 html5lib.treebuilders.getTreeBuilder(treeType) | |
| 99 | |
| 100 :arg strict: raise an exception when a parse error is encountered | |
| 101 | |
| 102 :arg namespaceHTMLElements: whether or not to namespace HTML elements | |
| 103 | |
| 104 :arg debug: whether or not to enable debug mode which logs things | |
| 105 | |
| 106 Example: | |
| 107 | |
| 108 >>> from html5lib.html5parser import HTMLParser | |
| 109 >>> parser = HTMLParser() # generates parser with etree builder | |
| 110 >>> parser = HTMLParser('lxml', strict=True) # generates parser with lxml builder which is strict | |
| 111 | |
| 112 """ | |
| 113 | |
| 114 # Raise an exception on the first error encountered | |
| 115 self.strict = strict | |
| 116 | |
| 117 if tree is None: | |
| 118 tree = treebuilders.getTreeBuilder("etree") | |
| 119 self.tree = tree(namespaceHTMLElements) | |
| 120 self.errors = [] | |
| 121 | |
| 122 self.phases = dict([(name, cls(self, self.tree)) for name, cls in | |
| 123 getPhases(debug).items()]) | |
| 124 | |
| 125 def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs): | |
| 126 | |
| 127 self.innerHTMLMode = innerHTML | |
| 128 self.container = container | |
| 129 self.scripting = scripting | |
| 130 self.tokenizer = _tokenizer.HTMLTokenizer(stream, parser=self, **kwargs) | |
| 131 self.reset() | |
| 132 | |
| 133 try: | |
| 134 self.mainLoop() | |
| 135 except _ReparseException: | |
| 136 self.reset() | |
| 137 self.mainLoop() | |
| 138 | |
| 139 def reset(self): | |
| 140 self.tree.reset() | |
| 141 self.firstStartTag = False | |
| 142 self.errors = [] | |
| 143 self.log = [] # only used with debug mode | |
| 144 # "quirks" / "limited quirks" / "no quirks" | |
| 145 self.compatMode = "no quirks" | |
| 146 | |
| 147 if self.innerHTMLMode: | |
| 148 self.innerHTML = self.container.lower() | |
| 149 | |
| 150 if self.innerHTML in cdataElements: | |
| 151 self.tokenizer.state = self.tokenizer.rcdataState | |
| 152 elif self.innerHTML in rcdataElements: | |
| 153 self.tokenizer.state = self.tokenizer.rawtextState | |
| 154 elif self.innerHTML == 'plaintext': | |
| 155 self.tokenizer.state = self.tokenizer.plaintextState | |
| 156 else: | |
| 157 # state already is data state | |
| 158 # self.tokenizer.state = self.tokenizer.dataState | |
| 159 pass | |
| 160 self.phase = self.phases["beforeHtml"] | |
| 161 self.phase.insertHtmlElement() | |
| 162 self.resetInsertionMode() | |
| 163 else: | |
| 164 self.innerHTML = False # pylint:disable=redefined-variable-type | |
| 165 self.phase = self.phases["initial"] | |
| 166 | |
| 167 self.lastPhase = None | |
| 168 | |
| 169 self.beforeRCDataPhase = None | |
| 170 | |
| 171 self.framesetOK = True | |
| 172 | |
| 173 @property | |
| 174 def documentEncoding(self): | |
| 175 """Name of the character encoding that was used to decode the input stream, or | |
| 176 :obj:`None` if that is not determined yet | |
| 177 | |
| 178 """ | |
| 179 if not hasattr(self, 'tokenizer'): | |
| 180 return None | |
| 181 return self.tokenizer.stream.charEncoding[0].name | |
| 182 | |
| 183 def isHTMLIntegrationPoint(self, element): | |
| 184 if (element.name == "annotation-xml" and | |
| 185 element.namespace == namespaces["mathml"]): | |
| 186 return ("encoding" in element.attributes and | |
| 187 element.attributes["encoding"].translate( | |
| 188 asciiUpper2Lower) in | |
| 189 ("text/html", "application/xhtml+xml")) | |
| 190 else: | |
| 191 return (element.namespace, element.name) in htmlIntegrationPointElements | |
| 192 | |
| 193 def isMathMLTextIntegrationPoint(self, element): | |
| 194 return (element.namespace, element.name) in mathmlTextIntegrationPointElements | |
| 195 | |
| 196 def mainLoop(self): | |
| 197 CharactersToken = tokenTypes["Characters"] | |
| 198 SpaceCharactersToken = tokenTypes["SpaceCharacters"] | |
| 199 StartTagToken = tokenTypes["StartTag"] | |
| 200 EndTagToken = tokenTypes["EndTag"] | |
| 201 CommentToken = tokenTypes["Comment"] | |
| 202 DoctypeToken = tokenTypes["Doctype"] | |
| 203 ParseErrorToken = tokenTypes["ParseError"] | |
| 204 | |
| 205 for token in self.normalizedTokens(): | |
| 206 prev_token = None | |
| 207 new_token = token | |
| 208 while new_token is not None: | |
| 209 prev_token = new_token | |
| 210 currentNode = self.tree.openElements[-1] if self.tree.openElements else None | |
| 211 currentNodeNamespace = currentNode.namespace if currentNode else None | |
| 212 currentNodeName = currentNode.name if currentNode else None | |
| 213 | |
| 214 type = new_token["type"] | |
| 215 | |
| 216 if type == ParseErrorToken: | |
| 217 self.parseError(new_token["data"], new_token.get("datavars", {})) | |
| 218 new_token = None | |
| 219 else: | |
| 220 if (len(self.tree.openElements) == 0 or | |
| 221 currentNodeNamespace == self.tree.defaultNamespace or | |
| 222 (self.isMathMLTextIntegrationPoint(currentNode) and | |
| 223 ((type == StartTagToken and | |
| 224 token["name"] not in frozenset(["mglyph", "malignmark"])) or | |
| 225 type in (CharactersToken, SpaceCharactersToken))) or | |
| 226 (currentNodeNamespace == namespaces["mathml"] and | |
| 227 currentNodeName == "annotation-xml" and | |
| 228 type == StartTagToken and | |
| 229 token["name"] == "svg") or | |
| 230 (self.isHTMLIntegrationPoint(currentNode) and | |
| 231 type in (StartTagToken, CharactersToken, SpaceCharactersToken))): | |
| 232 phase = self.phase | |
| 233 else: | |
| 234 phase = self.phases["inForeignContent"] | |
| 235 | |
| 236 if type == CharactersToken: | |
| 237 new_token = phase.processCharacters(new_token) | |
| 238 elif type == SpaceCharactersToken: | |
| 239 new_token = phase.processSpaceCharacters(new_token) | |
| 240 elif type == StartTagToken: | |
| 241 new_token = phase.processStartTag(new_token) | |
| 242 elif type == EndTagToken: | |
| 243 new_token = phase.processEndTag(new_token) | |
| 244 elif type == CommentToken: | |
| 245 new_token = phase.processComment(new_token) | |
| 246 elif type == DoctypeToken: | |
| 247 new_token = phase.processDoctype(new_token) | |
| 248 | |
| 249 if (type == StartTagToken and prev_token["selfClosing"] and | |
| 250 not prev_token["selfClosingAcknowledged"]): | |
| 251 self.parseError("non-void-element-with-trailing-solidus", | |
| 252 {"name": prev_token["name"]}) | |
| 253 | |
| 254 # When the loop finishes it's EOF | |
| 255 reprocess = True | |
| 256 phases = [] | |
| 257 while reprocess: | |
| 258 phases.append(self.phase) | |
| 259 reprocess = self.phase.processEOF() | |
| 260 if reprocess: | |
| 261 assert self.phase not in phases | |
| 262 | |
| 263 def normalizedTokens(self): | |
| 264 for token in self.tokenizer: | |
| 265 yield self.normalizeToken(token) | |
| 266 | |
| 267 def parse(self, stream, *args, **kwargs): | |
| 268 """Parse a HTML document into a well-formed tree | |
| 269 | |
| 270 :arg stream: a file-like object or string containing the HTML to be parsed | |
| 271 | |
| 272 The optional encoding parameter must be a string that indicates | |
| 273 the encoding. If specified, that encoding will be used, | |
| 274 regardless of any BOM or later declaration (such as in a meta | |
| 275 element). | |
| 276 | |
| 277 :arg scripting: treat noscript elements as if JavaScript was turned on | |
| 278 | |
| 279 :returns: parsed tree | |
| 280 | |
| 281 Example: | |
| 282 | |
| 283 >>> from html5lib.html5parser import HTMLParser | |
| 284 >>> parser = HTMLParser() | |
| 285 >>> parser.parse('<html><body><p>This is a doc</p></body></html>') | |
| 286 <Element u'{http://www.w3.org/1999/xhtml}html' at 0x7feac4909db0> | |
| 287 | |
| 288 """ | |
| 289 self._parse(stream, False, None, *args, **kwargs) | |
| 290 return self.tree.getDocument() | |
| 291 | |
| 292 def parseFragment(self, stream, *args, **kwargs): | |
| 293 """Parse a HTML fragment into a well-formed tree fragment | |
| 294 | |
| 295 :arg container: name of the element we're setting the innerHTML | |
| 296 property if set to None, default to 'div' | |
| 297 | |
| 298 :arg stream: a file-like object or string containing the HTML to be parsed | |
| 299 | |
| 300 The optional encoding parameter must be a string that indicates | |
| 301 the encoding. If specified, that encoding will be used, | |
| 302 regardless of any BOM or later declaration (such as in a meta | |
| 303 element) | |
| 304 | |
| 305 :arg scripting: treat noscript elements as if JavaScript was turned on | |
| 306 | |
| 307 :returns: parsed tree | |
| 308 | |
| 309 Example: | |
| 310 | |
| 311 >>> from html5lib.html5libparser import HTMLParser | |
| 312 >>> parser = HTMLParser() | |
| 313 >>> parser.parseFragment('<b>this is a fragment</b>') | |
| 314 <Element u'DOCUMENT_FRAGMENT' at 0x7feac484b090> | |
| 315 | |
| 316 """ | |
| 317 self._parse(stream, True, *args, **kwargs) | |
| 318 return self.tree.getFragment() | |
| 319 | |
| 320 def parseError(self, errorcode="XXX-undefined-error", datavars=None): | |
| 321 # XXX The idea is to make errorcode mandatory. | |
| 322 if datavars is None: | |
| 323 datavars = {} | |
| 324 self.errors.append((self.tokenizer.stream.position(), errorcode, datavars)) | |
| 325 if self.strict: | |
| 326 raise ParseError(E[errorcode] % datavars) | |
| 327 | |
| 328 def normalizeToken(self, token): | |
| 329 # HTML5 specific normalizations to the token stream | |
| 330 if token["type"] == tokenTypes["StartTag"]: | |
| 331 raw = token["data"] | |
| 332 token["data"] = OrderedDict(raw) | |
| 333 if len(raw) > len(token["data"]): | |
| 334 # we had some duplicated attribute, fix so first wins | |
| 335 token["data"].update(raw[::-1]) | |
| 336 | |
| 337 return token | |
| 338 | |
| 339 def adjustMathMLAttributes(self, token): | |
| 340 adjust_attributes(token, adjustMathMLAttributes) | |
| 341 | |
| 342 def adjustSVGAttributes(self, token): | |
| 343 adjust_attributes(token, adjustSVGAttributes) | |
| 344 | |
| 345 def adjustForeignAttributes(self, token): | |
| 346 adjust_attributes(token, adjustForeignAttributesMap) | |
| 347 | |
| 348 def reparseTokenNormal(self, token): | |
| 349 # pylint:disable=unused-argument | |
| 350 self.parser.phase() | |
| 351 | |
| 352 def resetInsertionMode(self): | |
| 353 # The name of this method is mostly historical. (It's also used in the | |
| 354 # specification.) | |
| 355 last = False | |
| 356 newModes = { | |
| 357 "select": "inSelect", | |
| 358 "td": "inCell", | |
| 359 "th": "inCell", | |
| 360 "tr": "inRow", | |
| 361 "tbody": "inTableBody", | |
| 362 "thead": "inTableBody", | |
| 363 "tfoot": "inTableBody", | |
| 364 "caption": "inCaption", | |
| 365 "colgroup": "inColumnGroup", | |
| 366 "table": "inTable", | |
| 367 "head": "inBody", | |
| 368 "body": "inBody", | |
| 369 "frameset": "inFrameset", | |
| 370 "html": "beforeHead" | |
| 371 } | |
| 372 for node in self.tree.openElements[::-1]: | |
| 373 nodeName = node.name | |
| 374 new_phase = None | |
| 375 if node == self.tree.openElements[0]: | |
| 376 assert self.innerHTML | |
| 377 last = True | |
| 378 nodeName = self.innerHTML | |
| 379 # Check for conditions that should only happen in the innerHTML | |
| 380 # case | |
| 381 if nodeName in ("select", "colgroup", "head", "html"): | |
| 382 assert self.innerHTML | |
| 383 | |
| 384 if not last and node.namespace != self.tree.defaultNamespace: | |
| 385 continue | |
| 386 | |
| 387 if nodeName in newModes: | |
| 388 new_phase = self.phases[newModes[nodeName]] | |
| 389 break | |
| 390 elif last: | |
| 391 new_phase = self.phases["inBody"] | |
| 392 break | |
| 393 | |
| 394 self.phase = new_phase | |
| 395 | |
| 396 def parseRCDataRawtext(self, token, contentType): | |
| 397 # Generic RCDATA/RAWTEXT Parsing algorithm | |
| 398 assert contentType in ("RAWTEXT", "RCDATA") | |
| 399 | |
| 400 self.tree.insertElement(token) | |
| 401 | |
| 402 if contentType == "RAWTEXT": | |
| 403 self.tokenizer.state = self.tokenizer.rawtextState | |
| 404 else: | |
| 405 self.tokenizer.state = self.tokenizer.rcdataState | |
| 406 | |
| 407 self.originalPhase = self.phase | |
| 408 | |
| 409 self.phase = self.phases["text"] | |
| 410 | |
| 411 | |
| 412 @_utils.memoize | |
| 413 def getPhases(debug): | |
| 414 def log(function): | |
| 415 """Logger that records which phase processes each token""" | |
| 416 type_names = dict((value, key) for key, value in | |
| 417 tokenTypes.items()) | |
| 418 | |
| 419 def wrapped(self, *args, **kwargs): | |
| 420 if function.__name__.startswith("process") and len(args) > 0: | |
| 421 token = args[0] | |
| 422 try: | |
| 423 info = {"type": type_names[token['type']]} | |
| 424 except: | |
| 425 raise | |
| 426 if token['type'] in tagTokenTypes: | |
| 427 info["name"] = token['name'] | |
| 428 | |
| 429 self.parser.log.append((self.parser.tokenizer.state.__name__, | |
| 430 self.parser.phase.__class__.__name__, | |
| 431 self.__class__.__name__, | |
| 432 function.__name__, | |
| 433 info)) | |
| 434 return function(self, *args, **kwargs) | |
| 435 else: | |
| 436 return function(self, *args, **kwargs) | |
| 437 return wrapped | |
| 438 | |
| 439 def getMetaclass(use_metaclass, metaclass_func): | |
| 440 if use_metaclass: | |
| 441 return method_decorator_metaclass(metaclass_func) | |
| 442 else: | |
| 443 return type | |
| 444 | |
| 445 # pylint:disable=unused-argument | |
| 446 class Phase(with_metaclass(getMetaclass(debug, log))): | |
| 447 """Base class for helper object that implements each phase of processing | |
| 448 """ | |
| 449 | |
| 450 def __init__(self, parser, tree): | |
| 451 self.parser = parser | |
| 452 self.tree = tree | |
| 453 | |
| 454 def processEOF(self): | |
| 455 raise NotImplementedError | |
| 456 | |
| 457 def processComment(self, token): | |
| 458 # For most phases the following is correct. Where it's not it will be | |
| 459 # overridden. | |
| 460 self.tree.insertComment(token, self.tree.openElements[-1]) | |
| 461 | |
| 462 def processDoctype(self, token): | |
| 463 self.parser.parseError("unexpected-doctype") | |
| 464 | |
| 465 def processCharacters(self, token): | |
| 466 self.tree.insertText(token["data"]) | |
| 467 | |
| 468 def processSpaceCharacters(self, token): | |
| 469 self.tree.insertText(token["data"]) | |
| 470 | |
| 471 def processStartTag(self, token): | |
| 472 return self.startTagHandler[token["name"]](token) | |
| 473 | |
| 474 def startTagHtml(self, token): | |
| 475 if not self.parser.firstStartTag and token["name"] == "html": | |
| 476 self.parser.parseError("non-html-root") | |
| 477 # XXX Need a check here to see if the first start tag token emitted is | |
| 478 # this token... If it's not, invoke self.parser.parseError(). | |
| 479 for attr, value in token["data"].items(): | |
| 480 if attr not in self.tree.openElements[0].attributes: | |
| 481 self.tree.openElements[0].attributes[attr] = value | |
| 482 self.parser.firstStartTag = False | |
| 483 | |
| 484 def processEndTag(self, token): | |
| 485 return self.endTagHandler[token["name"]](token) | |
| 486 | |
| 487 class InitialPhase(Phase): | |
| 488 def processSpaceCharacters(self, token): | |
| 489 pass | |
| 490 | |
| 491 def processComment(self, token): | |
| 492 self.tree.insertComment(token, self.tree.document) | |
| 493 | |
| 494 def processDoctype(self, token): | |
| 495 name = token["name"] | |
| 496 publicId = token["publicId"] | |
| 497 systemId = token["systemId"] | |
| 498 correct = token["correct"] | |
| 499 | |
| 500 if (name != "html" or publicId is not None or | |
| 501 systemId is not None and systemId != "about:legacy-compat"): | |
| 502 self.parser.parseError("unknown-doctype") | |
| 503 | |
| 504 if publicId is None: | |
| 505 publicId = "" | |
| 506 | |
| 507 self.tree.insertDoctype(token) | |
| 508 | |
| 509 if publicId != "": | |
| 510 publicId = publicId.translate(asciiUpper2Lower) | |
| 511 | |
| 512 if (not correct or token["name"] != "html" or | |
| 513 publicId.startswith( | |
| 514 ("+//silmaril//dtd html pro v0r11 19970101//", | |
| 515 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//", | |
| 516 "-//as//dtd html 3.0 aswedit + extensions//", | |
| 517 "-//ietf//dtd html 2.0 level 1//", | |
| 518 "-//ietf//dtd html 2.0 level 2//", | |
| 519 "-//ietf//dtd html 2.0 strict level 1//", | |
| 520 "-//ietf//dtd html 2.0 strict level 2//", | |
| 521 "-//ietf//dtd html 2.0 strict//", | |
| 522 "-//ietf//dtd html 2.0//", | |
| 523 "-//ietf//dtd html 2.1e//", | |
| 524 "-//ietf//dtd html 3.0//", | |
| 525 "-//ietf//dtd html 3.2 final//", | |
| 526 "-//ietf//dtd html 3.2//", | |
| 527 "-//ietf//dtd html 3//", | |
| 528 "-//ietf//dtd html level 0//", | |
| 529 "-//ietf//dtd html level 1//", | |
| 530 "-//ietf//dtd html level 2//", | |
| 531 "-//ietf//dtd html level 3//", | |
| 532 "-//ietf//dtd html strict level 0//", | |
| 533 "-//ietf//dtd html strict level 1//", | |
| 534 "-//ietf//dtd html strict level 2//", | |
| 535 "-//ietf//dtd html strict level 3//", | |
| 536 "-//ietf//dtd html strict//", | |
| 537 "-//ietf//dtd html//", | |
| 538 "-//metrius//dtd metrius presentational//", | |
| 539 "-//microsoft//dtd internet explorer 2.0 html strict//", | |
| 540 "-//microsoft//dtd internet explorer 2.0 html//", | |
| 541 "-//microsoft//dtd internet explorer 2.0 tables//", | |
| 542 "-//microsoft//dtd internet explorer 3.0 html strict//", | |
| 543 "-//microsoft//dtd internet explorer 3.0 html//", | |
| 544 "-//microsoft//dtd internet explorer 3.0 tables//", | |
| 545 "-//netscape comm. corp.//dtd html//", | |
| 546 "-//netscape comm. corp.//dtd strict html//", | |
| 547 "-//o'reilly and associates//dtd html 2.0//", | |
| 548 "-//o'reilly and associates//dtd html extended 1.0//", | |
| 549 "-//o'reilly and associates//dtd html extended relaxed 1.0//", | |
| 550 "-//softquad software//dtd hotmetal pro 6.0::19990601::extensions to html 4.0//", | |
| 551 "-//softquad//dtd hotmetal pro 4.0::19971010::extensions to html 4.0//", | |
| 552 "-//spyglass//dtd html 2.0 extended//", | |
| 553 "-//sq//dtd html 2.0 hotmetal + extensions//", | |
| 554 "-//sun microsystems corp.//dtd hotjava html//", | |
| 555 "-//sun microsystems corp.//dtd hotjava strict html//", | |
| 556 "-//w3c//dtd html 3 1995-03-24//", | |
| 557 "-//w3c//dtd html 3.2 draft//", | |
| 558 "-//w3c//dtd html 3.2 final//", | |
| 559 "-//w3c//dtd html 3.2//", | |
| 560 "-//w3c//dtd html 3.2s draft//", | |
| 561 "-//w3c//dtd html 4.0 frameset//", | |
| 562 "-//w3c//dtd html 4.0 transitional//", | |
| 563 "-//w3c//dtd html experimental 19960712//", | |
| 564 "-//w3c//dtd html experimental 970421//", | |
| 565 "-//w3c//dtd w3 html//", | |
| 566 "-//w3o//dtd w3 html 3.0//", | |
| 567 "-//webtechs//dtd mozilla html 2.0//", | |
| 568 "-//webtechs//dtd mozilla html//")) or | |
| 569 publicId in ("-//w3o//dtd w3 html strict 3.0//en//", | |
| 570 "-/w3c/dtd html 4.0 transitional/en", | |
| 571 "html") or | |
| 572 publicId.startswith( | |
| 573 ("-//w3c//dtd html 4.01 frameset//", | |
| 574 "-//w3c//dtd html 4.01 transitional//")) and | |
| 575 systemId is None or | |
| 576 systemId and systemId.lower() == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"): | |
| 577 self.parser.compatMode = "quirks" | |
| 578 elif (publicId.startswith( | |
| 579 ("-//w3c//dtd xhtml 1.0 frameset//", | |
| 580 "-//w3c//dtd xhtml 1.0 transitional//")) or | |
| 581 publicId.startswith( | |
| 582 ("-//w3c//dtd html 4.01 frameset//", | |
| 583 "-//w3c//dtd html 4.01 transitional//")) and | |
| 584 systemId is not None): | |
| 585 self.parser.compatMode = "limited quirks" | |
| 586 | |
| 587 self.parser.phase = self.parser.phases["beforeHtml"] | |
| 588 | |
| 589 def anythingElse(self): | |
| 590 self.parser.compatMode = "quirks" | |
| 591 self.parser.phase = self.parser.phases["beforeHtml"] | |
| 592 | |
| 593 def processCharacters(self, token): | |
| 594 self.parser.parseError("expected-doctype-but-got-chars") | |
| 595 self.anythingElse() | |
| 596 return token | |
| 597 | |
| 598 def processStartTag(self, token): | |
| 599 self.parser.parseError("expected-doctype-but-got-start-tag", | |
| 600 {"name": token["name"]}) | |
| 601 self.anythingElse() | |
| 602 return token | |
| 603 | |
| 604 def processEndTag(self, token): | |
| 605 self.parser.parseError("expected-doctype-but-got-end-tag", | |
| 606 {"name": token["name"]}) | |
| 607 self.anythingElse() | |
| 608 return token | |
| 609 | |
| 610 def processEOF(self): | |
| 611 self.parser.parseError("expected-doctype-but-got-eof") | |
| 612 self.anythingElse() | |
| 613 return True | |
| 614 | |
| 615 class BeforeHtmlPhase(Phase): | |
| 616 # helper methods | |
| 617 def insertHtmlElement(self): | |
| 618 self.tree.insertRoot(impliedTagToken("html", "StartTag")) | |
| 619 self.parser.phase = self.parser.phases["beforeHead"] | |
| 620 | |
| 621 # other | |
| 622 def processEOF(self): | |
| 623 self.insertHtmlElement() | |
| 624 return True | |
| 625 | |
| 626 def processComment(self, token): | |
| 627 self.tree.insertComment(token, self.tree.document) | |
| 628 | |
| 629 def processSpaceCharacters(self, token): | |
| 630 pass | |
| 631 | |
| 632 def processCharacters(self, token): | |
| 633 self.insertHtmlElement() | |
| 634 return token | |
| 635 | |
| 636 def processStartTag(self, token): | |
| 637 if token["name"] == "html": | |
| 638 self.parser.firstStartTag = True | |
| 639 self.insertHtmlElement() | |
| 640 return token | |
| 641 | |
| 642 def processEndTag(self, token): | |
| 643 if token["name"] not in ("head", "body", "html", "br"): | |
| 644 self.parser.parseError("unexpected-end-tag-before-html", | |
| 645 {"name": token["name"]}) | |
| 646 else: | |
| 647 self.insertHtmlElement() | |
| 648 return token | |
| 649 | |
| 650 class BeforeHeadPhase(Phase): | |
| 651 def __init__(self, parser, tree): | |
| 652 Phase.__init__(self, parser, tree) | |
| 653 | |
| 654 self.startTagHandler = _utils.MethodDispatcher([ | |
| 655 ("html", self.startTagHtml), | |
| 656 ("head", self.startTagHead) | |
| 657 ]) | |
| 658 self.startTagHandler.default = self.startTagOther | |
| 659 | |
| 660 self.endTagHandler = _utils.MethodDispatcher([ | |
| 661 (("head", "body", "html", "br"), self.endTagImplyHead) | |
| 662 ]) | |
| 663 self.endTagHandler.default = self.endTagOther | |
| 664 | |
| 665 def processEOF(self): | |
| 666 self.startTagHead(impliedTagToken("head", "StartTag")) | |
| 667 return True | |
| 668 | |
| 669 def processSpaceCharacters(self, token): | |
| 670 pass | |
| 671 | |
| 672 def processCharacters(self, token): | |
| 673 self.startTagHead(impliedTagToken("head", "StartTag")) | |
| 674 return token | |
| 675 | |
| 676 def startTagHtml(self, token): | |
| 677 return self.parser.phases["inBody"].processStartTag(token) | |
| 678 | |
| 679 def startTagHead(self, token): | |
| 680 self.tree.insertElement(token) | |
| 681 self.tree.headPointer = self.tree.openElements[-1] | |
| 682 self.parser.phase = self.parser.phases["inHead"] | |
| 683 | |
| 684 def startTagOther(self, token): | |
| 685 self.startTagHead(impliedTagToken("head", "StartTag")) | |
| 686 return token | |
| 687 | |
| 688 def endTagImplyHead(self, token): | |
| 689 self.startTagHead(impliedTagToken("head", "StartTag")) | |
| 690 return token | |
| 691 | |
| 692 def endTagOther(self, token): | |
| 693 self.parser.parseError("end-tag-after-implied-root", | |
| 694 {"name": token["name"]}) | |
| 695 | |
| 696 class InHeadPhase(Phase): | |
| 697 def __init__(self, parser, tree): | |
| 698 Phase.__init__(self, parser, tree) | |
| 699 | |
| 700 self.startTagHandler = _utils.MethodDispatcher([ | |
| 701 ("html", self.startTagHtml), | |
| 702 ("title", self.startTagTitle), | |
| 703 (("noframes", "style"), self.startTagNoFramesStyle), | |
| 704 ("noscript", self.startTagNoscript), | |
| 705 ("script", self.startTagScript), | |
| 706 (("base", "basefont", "bgsound", "command", "link"), | |
| 707 self.startTagBaseLinkCommand), | |
| 708 ("meta", self.startTagMeta), | |
| 709 ("head", self.startTagHead) | |
| 710 ]) | |
| 711 self.startTagHandler.default = self.startTagOther | |
| 712 | |
| 713 self.endTagHandler = _utils.MethodDispatcher([ | |
| 714 ("head", self.endTagHead), | |
| 715 (("br", "html", "body"), self.endTagHtmlBodyBr) | |
| 716 ]) | |
| 717 self.endTagHandler.default = self.endTagOther | |
| 718 | |
| 719 # the real thing | |
| 720 def processEOF(self): | |
| 721 self.anythingElse() | |
| 722 return True | |
| 723 | |
| 724 def processCharacters(self, token): | |
| 725 self.anythingElse() | |
| 726 return token | |
| 727 | |
| 728 def startTagHtml(self, token): | |
| 729 return self.parser.phases["inBody"].processStartTag(token) | |
| 730 | |
| 731 def startTagHead(self, token): | |
| 732 self.parser.parseError("two-heads-are-not-better-than-one") | |
| 733 | |
| 734 def startTagBaseLinkCommand(self, token): | |
| 735 self.tree.insertElement(token) | |
| 736 self.tree.openElements.pop() | |
| 737 token["selfClosingAcknowledged"] = True | |
| 738 | |
| 739 def startTagMeta(self, token): | |
| 740 self.tree.insertElement(token) | |
| 741 self.tree.openElements.pop() | |
| 742 token["selfClosingAcknowledged"] = True | |
| 743 | |
| 744 attributes = token["data"] | |
| 745 if self.parser.tokenizer.stream.charEncoding[1] == "tentative": | |
| 746 if "charset" in attributes: | |
| 747 self.parser.tokenizer.stream.changeEncoding(attributes["charset"]) | |
| 748 elif ("content" in attributes and | |
| 749 "http-equiv" in attributes and | |
| 750 attributes["http-equiv"].lower() == "content-type"): | |
| 751 # Encoding it as UTF-8 here is a hack, as really we should pass | |
| 752 # the abstract Unicode string, and just use the | |
| 753 # ContentAttrParser on that, but using UTF-8 allows all chars | |
| 754 # to be encoded and as a ASCII-superset works. | |
| 755 data = _inputstream.EncodingBytes(attributes["content"].encode("utf-8")) | |
| 756 parser = _inputstream.ContentAttrParser(data) | |
| 757 codec = parser.parse() | |
| 758 self.parser.tokenizer.stream.changeEncoding(codec) | |
| 759 | |
| 760 def startTagTitle(self, token): | |
| 761 self.parser.parseRCDataRawtext(token, "RCDATA") | |
| 762 | |
| 763 def startTagNoFramesStyle(self, token): | |
| 764 # Need to decide whether to implement the scripting-disabled case | |
| 765 self.parser.parseRCDataRawtext(token, "RAWTEXT") | |
| 766 | |
| 767 def startTagNoscript(self, token): | |
| 768 if self.parser.scripting: | |
| 769 self.parser.parseRCDataRawtext(token, "RAWTEXT") | |
| 770 else: | |
| 771 self.tree.insertElement(token) | |
| 772 self.parser.phase = self.parser.phases["inHeadNoscript"] | |
| 773 | |
| 774 def startTagScript(self, token): | |
| 775 self.tree.insertElement(token) | |
| 776 self.parser.tokenizer.state = self.parser.tokenizer.scriptDataState | |
| 777 self.parser.originalPhase = self.parser.phase | |
| 778 self.parser.phase = self.parser.phases["text"] | |
| 779 | |
| 780 def startTagOther(self, token): | |
| 781 self.anythingElse() | |
| 782 return token | |
| 783 | |
| 784 def endTagHead(self, token): | |
| 785 node = self.parser.tree.openElements.pop() | |
| 786 assert node.name == "head", "Expected head got %s" % node.name | |
| 787 self.parser.phase = self.parser.phases["afterHead"] | |
| 788 | |
| 789 def endTagHtmlBodyBr(self, token): | |
| 790 self.anythingElse() | |
| 791 return token | |
| 792 | |
| 793 def endTagOther(self, token): | |
| 794 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 795 | |
| 796 def anythingElse(self): | |
| 797 self.endTagHead(impliedTagToken("head")) | |
| 798 | |
| 799 class InHeadNoscriptPhase(Phase): | |
| 800 def __init__(self, parser, tree): | |
| 801 Phase.__init__(self, parser, tree) | |
| 802 | |
| 803 self.startTagHandler = _utils.MethodDispatcher([ | |
| 804 ("html", self.startTagHtml), | |
| 805 (("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand), | |
| 806 (("head", "noscript"), self.startTagHeadNoscript), | |
| 807 ]) | |
| 808 self.startTagHandler.default = self.startTagOther | |
| 809 | |
| 810 self.endTagHandler = _utils.MethodDispatcher([ | |
| 811 ("noscript", self.endTagNoscript), | |
| 812 ("br", self.endTagBr), | |
| 813 ]) | |
| 814 self.endTagHandler.default = self.endTagOther | |
| 815 | |
| 816 def processEOF(self): | |
| 817 self.parser.parseError("eof-in-head-noscript") | |
| 818 self.anythingElse() | |
| 819 return True | |
| 820 | |
| 821 def processComment(self, token): | |
| 822 return self.parser.phases["inHead"].processComment(token) | |
| 823 | |
| 824 def processCharacters(self, token): | |
| 825 self.parser.parseError("char-in-head-noscript") | |
| 826 self.anythingElse() | |
| 827 return token | |
| 828 | |
| 829 def processSpaceCharacters(self, token): | |
| 830 return self.parser.phases["inHead"].processSpaceCharacters(token) | |
| 831 | |
| 832 def startTagHtml(self, token): | |
| 833 return self.parser.phases["inBody"].processStartTag(token) | |
| 834 | |
| 835 def startTagBaseLinkCommand(self, token): | |
| 836 return self.parser.phases["inHead"].processStartTag(token) | |
| 837 | |
| 838 def startTagHeadNoscript(self, token): | |
| 839 self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | |
| 840 | |
| 841 def startTagOther(self, token): | |
| 842 self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) | |
| 843 self.anythingElse() | |
| 844 return token | |
| 845 | |
| 846 def endTagNoscript(self, token): | |
| 847 node = self.parser.tree.openElements.pop() | |
| 848 assert node.name == "noscript", "Expected noscript got %s" % node.name | |
| 849 self.parser.phase = self.parser.phases["inHead"] | |
| 850 | |
| 851 def endTagBr(self, token): | |
| 852 self.parser.parseError("unexpected-inhead-noscript-tag", {"name": token["name"]}) | |
| 853 self.anythingElse() | |
| 854 return token | |
| 855 | |
| 856 def endTagOther(self, token): | |
| 857 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 858 | |
| 859 def anythingElse(self): | |
| 860 # Caller must raise parse error first! | |
| 861 self.endTagNoscript(impliedTagToken("noscript")) | |
| 862 | |
| 863 class AfterHeadPhase(Phase): | |
| 864 def __init__(self, parser, tree): | |
| 865 Phase.__init__(self, parser, tree) | |
| 866 | |
| 867 self.startTagHandler = _utils.MethodDispatcher([ | |
| 868 ("html", self.startTagHtml), | |
| 869 ("body", self.startTagBody), | |
| 870 ("frameset", self.startTagFrameset), | |
| 871 (("base", "basefont", "bgsound", "link", "meta", "noframes", "script", | |
| 872 "style", "title"), | |
| 873 self.startTagFromHead), | |
| 874 ("head", self.startTagHead) | |
| 875 ]) | |
| 876 self.startTagHandler.default = self.startTagOther | |
| 877 self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"), | |
| 878 self.endTagHtmlBodyBr)]) | |
| 879 self.endTagHandler.default = self.endTagOther | |
| 880 | |
| 881 def processEOF(self): | |
| 882 self.anythingElse() | |
| 883 return True | |
| 884 | |
| 885 def processCharacters(self, token): | |
| 886 self.anythingElse() | |
| 887 return token | |
| 888 | |
| 889 def startTagHtml(self, token): | |
| 890 return self.parser.phases["inBody"].processStartTag(token) | |
| 891 | |
| 892 def startTagBody(self, token): | |
| 893 self.parser.framesetOK = False | |
| 894 self.tree.insertElement(token) | |
| 895 self.parser.phase = self.parser.phases["inBody"] | |
| 896 | |
| 897 def startTagFrameset(self, token): | |
| 898 self.tree.insertElement(token) | |
| 899 self.parser.phase = self.parser.phases["inFrameset"] | |
| 900 | |
| 901 def startTagFromHead(self, token): | |
| 902 self.parser.parseError("unexpected-start-tag-out-of-my-head", | |
| 903 {"name": token["name"]}) | |
| 904 self.tree.openElements.append(self.tree.headPointer) | |
| 905 self.parser.phases["inHead"].processStartTag(token) | |
| 906 for node in self.tree.openElements[::-1]: | |
| 907 if node.name == "head": | |
| 908 self.tree.openElements.remove(node) | |
| 909 break | |
| 910 | |
| 911 def startTagHead(self, token): | |
| 912 self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | |
| 913 | |
| 914 def startTagOther(self, token): | |
| 915 self.anythingElse() | |
| 916 return token | |
| 917 | |
| 918 def endTagHtmlBodyBr(self, token): | |
| 919 self.anythingElse() | |
| 920 return token | |
| 921 | |
| 922 def endTagOther(self, token): | |
| 923 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 924 | |
| 925 def anythingElse(self): | |
| 926 self.tree.insertElement(impliedTagToken("body", "StartTag")) | |
| 927 self.parser.phase = self.parser.phases["inBody"] | |
| 928 self.parser.framesetOK = True | |
| 929 | |
| 930 class InBodyPhase(Phase): | |
| 931 # http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody | |
| 932 # the really-really-really-very crazy mode | |
| 933 def __init__(self, parser, tree): | |
| 934 Phase.__init__(self, parser, tree) | |
| 935 | |
| 936 # Set this to the default handler | |
| 937 self.processSpaceCharacters = self.processSpaceCharactersNonPre | |
| 938 | |
| 939 self.startTagHandler = _utils.MethodDispatcher([ | |
| 940 ("html", self.startTagHtml), | |
| 941 (("base", "basefont", "bgsound", "command", "link", "meta", | |
| 942 "script", "style", "title"), | |
| 943 self.startTagProcessInHead), | |
| 944 ("body", self.startTagBody), | |
| 945 ("frameset", self.startTagFrameset), | |
| 946 (("address", "article", "aside", "blockquote", "center", "details", | |
| 947 "dir", "div", "dl", "fieldset", "figcaption", "figure", | |
| 948 "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", | |
| 949 "section", "summary", "ul"), | |
| 950 self.startTagCloseP), | |
| 951 (headingElements, self.startTagHeading), | |
| 952 (("pre", "listing"), self.startTagPreListing), | |
| 953 ("form", self.startTagForm), | |
| 954 (("li", "dd", "dt"), self.startTagListItem), | |
| 955 ("plaintext", self.startTagPlaintext), | |
| 956 ("a", self.startTagA), | |
| 957 (("b", "big", "code", "em", "font", "i", "s", "small", "strike", | |
| 958 "strong", "tt", "u"), self.startTagFormatting), | |
| 959 ("nobr", self.startTagNobr), | |
| 960 ("button", self.startTagButton), | |
| 961 (("applet", "marquee", "object"), self.startTagAppletMarqueeObject), | |
| 962 ("xmp", self.startTagXmp), | |
| 963 ("table", self.startTagTable), | |
| 964 (("area", "br", "embed", "img", "keygen", "wbr"), | |
| 965 self.startTagVoidFormatting), | |
| 966 (("param", "source", "track"), self.startTagParamSource), | |
| 967 ("input", self.startTagInput), | |
| 968 ("hr", self.startTagHr), | |
| 969 ("image", self.startTagImage), | |
| 970 ("isindex", self.startTagIsIndex), | |
| 971 ("textarea", self.startTagTextarea), | |
| 972 ("iframe", self.startTagIFrame), | |
| 973 ("noscript", self.startTagNoscript), | |
| 974 (("noembed", "noframes"), self.startTagRawtext), | |
| 975 ("select", self.startTagSelect), | |
| 976 (("rp", "rt"), self.startTagRpRt), | |
| 977 (("option", "optgroup"), self.startTagOpt), | |
| 978 (("math"), self.startTagMath), | |
| 979 (("svg"), self.startTagSvg), | |
| 980 (("caption", "col", "colgroup", "frame", "head", | |
| 981 "tbody", "td", "tfoot", "th", "thead", | |
| 982 "tr"), self.startTagMisplaced) | |
| 983 ]) | |
| 984 self.startTagHandler.default = self.startTagOther | |
| 985 | |
| 986 self.endTagHandler = _utils.MethodDispatcher([ | |
| 987 ("body", self.endTagBody), | |
| 988 ("html", self.endTagHtml), | |
| 989 (("address", "article", "aside", "blockquote", "button", "center", | |
| 990 "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", | |
| 991 "footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre", | |
| 992 "section", "summary", "ul"), self.endTagBlock), | |
| 993 ("form", self.endTagForm), | |
| 994 ("p", self.endTagP), | |
| 995 (("dd", "dt", "li"), self.endTagListItem), | |
| 996 (headingElements, self.endTagHeading), | |
| 997 (("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", | |
| 998 "strike", "strong", "tt", "u"), self.endTagFormatting), | |
| 999 (("applet", "marquee", "object"), self.endTagAppletMarqueeObject), | |
| 1000 ("br", self.endTagBr), | |
| 1001 ]) | |
| 1002 self.endTagHandler.default = self.endTagOther | |
| 1003 | |
| 1004 def isMatchingFormattingElement(self, node1, node2): | |
| 1005 return (node1.name == node2.name and | |
| 1006 node1.namespace == node2.namespace and | |
| 1007 node1.attributes == node2.attributes) | |
| 1008 | |
| 1009 # helper | |
| 1010 def addFormattingElement(self, token): | |
| 1011 self.tree.insertElement(token) | |
| 1012 element = self.tree.openElements[-1] | |
| 1013 | |
| 1014 matchingElements = [] | |
| 1015 for node in self.tree.activeFormattingElements[::-1]: | |
| 1016 if node is Marker: | |
| 1017 break | |
| 1018 elif self.isMatchingFormattingElement(node, element): | |
| 1019 matchingElements.append(node) | |
| 1020 | |
| 1021 assert len(matchingElements) <= 3 | |
| 1022 if len(matchingElements) == 3: | |
| 1023 self.tree.activeFormattingElements.remove(matchingElements[-1]) | |
| 1024 self.tree.activeFormattingElements.append(element) | |
| 1025 | |
| 1026 # the real deal | |
| 1027 def processEOF(self): | |
| 1028 allowed_elements = frozenset(("dd", "dt", "li", "p", "tbody", "td", | |
| 1029 "tfoot", "th", "thead", "tr", "body", | |
| 1030 "html")) | |
| 1031 for node in self.tree.openElements[::-1]: | |
| 1032 if node.name not in allowed_elements: | |
| 1033 self.parser.parseError("expected-closing-tag-but-got-eof") | |
| 1034 break | |
| 1035 # Stop parsing | |
| 1036 | |
| 1037 def processSpaceCharactersDropNewline(self, token): | |
| 1038 # Sometimes (start of <pre>, <listing>, and <textarea> blocks) we | |
| 1039 # want to drop leading newlines | |
| 1040 data = token["data"] | |
| 1041 self.processSpaceCharacters = self.processSpaceCharactersNonPre | |
| 1042 if (data.startswith("\n") and | |
| 1043 self.tree.openElements[-1].name in ("pre", "listing", "textarea") and | |
| 1044 not self.tree.openElements[-1].hasContent()): | |
| 1045 data = data[1:] | |
| 1046 if data: | |
| 1047 self.tree.reconstructActiveFormattingElements() | |
| 1048 self.tree.insertText(data) | |
| 1049 | |
| 1050 def processCharacters(self, token): | |
| 1051 if token["data"] == "\u0000": | |
| 1052 # The tokenizer should always emit null on its own | |
| 1053 return | |
| 1054 self.tree.reconstructActiveFormattingElements() | |
| 1055 self.tree.insertText(token["data"]) | |
| 1056 # This must be bad for performance | |
| 1057 if (self.parser.framesetOK and | |
| 1058 any([char not in spaceCharacters | |
| 1059 for char in token["data"]])): | |
| 1060 self.parser.framesetOK = False | |
| 1061 | |
| 1062 def processSpaceCharactersNonPre(self, token): | |
| 1063 self.tree.reconstructActiveFormattingElements() | |
| 1064 self.tree.insertText(token["data"]) | |
| 1065 | |
| 1066 def startTagProcessInHead(self, token): | |
| 1067 return self.parser.phases["inHead"].processStartTag(token) | |
| 1068 | |
| 1069 def startTagBody(self, token): | |
| 1070 self.parser.parseError("unexpected-start-tag", {"name": "body"}) | |
| 1071 if (len(self.tree.openElements) == 1 or | |
| 1072 self.tree.openElements[1].name != "body"): | |
| 1073 assert self.parser.innerHTML | |
| 1074 else: | |
| 1075 self.parser.framesetOK = False | |
| 1076 for attr, value in token["data"].items(): | |
| 1077 if attr not in self.tree.openElements[1].attributes: | |
| 1078 self.tree.openElements[1].attributes[attr] = value | |
| 1079 | |
| 1080 def startTagFrameset(self, token): | |
| 1081 self.parser.parseError("unexpected-start-tag", {"name": "frameset"}) | |
| 1082 if (len(self.tree.openElements) == 1 or self.tree.openElements[1].name != "body"): | |
| 1083 assert self.parser.innerHTML | |
| 1084 elif not self.parser.framesetOK: | |
| 1085 pass | |
| 1086 else: | |
| 1087 if self.tree.openElements[1].parent: | |
| 1088 self.tree.openElements[1].parent.removeChild(self.tree.openElements[1]) | |
| 1089 while self.tree.openElements[-1].name != "html": | |
| 1090 self.tree.openElements.pop() | |
| 1091 self.tree.insertElement(token) | |
| 1092 self.parser.phase = self.parser.phases["inFrameset"] | |
| 1093 | |
| 1094 def startTagCloseP(self, token): | |
| 1095 if self.tree.elementInScope("p", variant="button"): | |
| 1096 self.endTagP(impliedTagToken("p")) | |
| 1097 self.tree.insertElement(token) | |
| 1098 | |
| 1099 def startTagPreListing(self, token): | |
| 1100 if self.tree.elementInScope("p", variant="button"): | |
| 1101 self.endTagP(impliedTagToken("p")) | |
| 1102 self.tree.insertElement(token) | |
| 1103 self.parser.framesetOK = False | |
| 1104 self.processSpaceCharacters = self.processSpaceCharactersDropNewline | |
| 1105 | |
| 1106 def startTagForm(self, token): | |
| 1107 if self.tree.formPointer: | |
| 1108 self.parser.parseError("unexpected-start-tag", {"name": "form"}) | |
| 1109 else: | |
| 1110 if self.tree.elementInScope("p", variant="button"): | |
| 1111 self.endTagP(impliedTagToken("p")) | |
| 1112 self.tree.insertElement(token) | |
| 1113 self.tree.formPointer = self.tree.openElements[-1] | |
| 1114 | |
| 1115 def startTagListItem(self, token): | |
| 1116 self.parser.framesetOK = False | |
| 1117 | |
| 1118 stopNamesMap = {"li": ["li"], | |
| 1119 "dt": ["dt", "dd"], | |
| 1120 "dd": ["dt", "dd"]} | |
| 1121 stopNames = stopNamesMap[token["name"]] | |
| 1122 for node in reversed(self.tree.openElements): | |
| 1123 if node.name in stopNames: | |
| 1124 self.parser.phase.processEndTag( | |
| 1125 impliedTagToken(node.name, "EndTag")) | |
| 1126 break | |
| 1127 if (node.nameTuple in specialElements and | |
| 1128 node.name not in ("address", "div", "p")): | |
| 1129 break | |
| 1130 | |
| 1131 if self.tree.elementInScope("p", variant="button"): | |
| 1132 self.parser.phase.processEndTag( | |
| 1133 impliedTagToken("p", "EndTag")) | |
| 1134 | |
| 1135 self.tree.insertElement(token) | |
| 1136 | |
| 1137 def startTagPlaintext(self, token): | |
| 1138 if self.tree.elementInScope("p", variant="button"): | |
| 1139 self.endTagP(impliedTagToken("p")) | |
| 1140 self.tree.insertElement(token) | |
| 1141 self.parser.tokenizer.state = self.parser.tokenizer.plaintextState | |
| 1142 | |
| 1143 def startTagHeading(self, token): | |
| 1144 if self.tree.elementInScope("p", variant="button"): | |
| 1145 self.endTagP(impliedTagToken("p")) | |
| 1146 if self.tree.openElements[-1].name in headingElements: | |
| 1147 self.parser.parseError("unexpected-start-tag", {"name": token["name"]}) | |
| 1148 self.tree.openElements.pop() | |
| 1149 self.tree.insertElement(token) | |
| 1150 | |
| 1151 def startTagA(self, token): | |
| 1152 afeAElement = self.tree.elementInActiveFormattingElements("a") | |
| 1153 if afeAElement: | |
| 1154 self.parser.parseError("unexpected-start-tag-implies-end-tag", | |
| 1155 {"startName": "a", "endName": "a"}) | |
| 1156 self.endTagFormatting(impliedTagToken("a")) | |
| 1157 if afeAElement in self.tree.openElements: | |
| 1158 self.tree.openElements.remove(afeAElement) | |
| 1159 if afeAElement in self.tree.activeFormattingElements: | |
| 1160 self.tree.activeFormattingElements.remove(afeAElement) | |
| 1161 self.tree.reconstructActiveFormattingElements() | |
| 1162 self.addFormattingElement(token) | |
| 1163 | |
| 1164 def startTagFormatting(self, token): | |
| 1165 self.tree.reconstructActiveFormattingElements() | |
| 1166 self.addFormattingElement(token) | |
| 1167 | |
| 1168 def startTagNobr(self, token): | |
| 1169 self.tree.reconstructActiveFormattingElements() | |
| 1170 if self.tree.elementInScope("nobr"): | |
| 1171 self.parser.parseError("unexpected-start-tag-implies-end-tag", | |
| 1172 {"startName": "nobr", "endName": "nobr"}) | |
| 1173 self.processEndTag(impliedTagToken("nobr")) | |
| 1174 # XXX Need tests that trigger the following | |
| 1175 self.tree.reconstructActiveFormattingElements() | |
| 1176 self.addFormattingElement(token) | |
| 1177 | |
| 1178 def startTagButton(self, token): | |
| 1179 if self.tree.elementInScope("button"): | |
| 1180 self.parser.parseError("unexpected-start-tag-implies-end-tag", | |
| 1181 {"startName": "button", "endName": "button"}) | |
| 1182 self.processEndTag(impliedTagToken("button")) | |
| 1183 return token | |
| 1184 else: | |
| 1185 self.tree.reconstructActiveFormattingElements() | |
| 1186 self.tree.insertElement(token) | |
| 1187 self.parser.framesetOK = False | |
| 1188 | |
| 1189 def startTagAppletMarqueeObject(self, token): | |
| 1190 self.tree.reconstructActiveFormattingElements() | |
| 1191 self.tree.insertElement(token) | |
| 1192 self.tree.activeFormattingElements.append(Marker) | |
| 1193 self.parser.framesetOK = False | |
| 1194 | |
| 1195 def startTagXmp(self, token): | |
| 1196 if self.tree.elementInScope("p", variant="button"): | |
| 1197 self.endTagP(impliedTagToken("p")) | |
| 1198 self.tree.reconstructActiveFormattingElements() | |
| 1199 self.parser.framesetOK = False | |
| 1200 self.parser.parseRCDataRawtext(token, "RAWTEXT") | |
| 1201 | |
| 1202 def startTagTable(self, token): | |
| 1203 if self.parser.compatMode != "quirks": | |
| 1204 if self.tree.elementInScope("p", variant="button"): | |
| 1205 self.processEndTag(impliedTagToken("p")) | |
| 1206 self.tree.insertElement(token) | |
| 1207 self.parser.framesetOK = False | |
| 1208 self.parser.phase = self.parser.phases["inTable"] | |
| 1209 | |
| 1210 def startTagVoidFormatting(self, token): | |
| 1211 self.tree.reconstructActiveFormattingElements() | |
| 1212 self.tree.insertElement(token) | |
| 1213 self.tree.openElements.pop() | |
| 1214 token["selfClosingAcknowledged"] = True | |
| 1215 self.parser.framesetOK = False | |
| 1216 | |
| 1217 def startTagInput(self, token): | |
| 1218 framesetOK = self.parser.framesetOK | |
| 1219 self.startTagVoidFormatting(token) | |
| 1220 if ("type" in token["data"] and | |
| 1221 token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): | |
| 1222 # input type=hidden doesn't change framesetOK | |
| 1223 self.parser.framesetOK = framesetOK | |
| 1224 | |
| 1225 def startTagParamSource(self, token): | |
| 1226 self.tree.insertElement(token) | |
| 1227 self.tree.openElements.pop() | |
| 1228 token["selfClosingAcknowledged"] = True | |
| 1229 | |
| 1230 def startTagHr(self, token): | |
| 1231 if self.tree.elementInScope("p", variant="button"): | |
| 1232 self.endTagP(impliedTagToken("p")) | |
| 1233 self.tree.insertElement(token) | |
| 1234 self.tree.openElements.pop() | |
| 1235 token["selfClosingAcknowledged"] = True | |
| 1236 self.parser.framesetOK = False | |
| 1237 | |
| 1238 def startTagImage(self, token): | |
| 1239 # No really... | |
| 1240 self.parser.parseError("unexpected-start-tag-treated-as", | |
| 1241 {"originalName": "image", "newName": "img"}) | |
| 1242 self.processStartTag(impliedTagToken("img", "StartTag", | |
| 1243 attributes=token["data"], | |
| 1244 selfClosing=token["selfClosing"])) | |
| 1245 | |
| 1246 def startTagIsIndex(self, token): | |
| 1247 self.parser.parseError("deprecated-tag", {"name": "isindex"}) | |
| 1248 if self.tree.formPointer: | |
| 1249 return | |
| 1250 form_attrs = {} | |
| 1251 if "action" in token["data"]: | |
| 1252 form_attrs["action"] = token["data"]["action"] | |
| 1253 self.processStartTag(impliedTagToken("form", "StartTag", | |
| 1254 attributes=form_attrs)) | |
| 1255 self.processStartTag(impliedTagToken("hr", "StartTag")) | |
| 1256 self.processStartTag(impliedTagToken("label", "StartTag")) | |
| 1257 # XXX Localization ... | |
| 1258 if "prompt" in token["data"]: | |
| 1259 prompt = token["data"]["prompt"] | |
| 1260 else: | |
| 1261 prompt = "This is a searchable index. Enter search keywords: " | |
| 1262 self.processCharacters( | |
| 1263 {"type": tokenTypes["Characters"], "data": prompt}) | |
| 1264 attributes = token["data"].copy() | |
| 1265 if "action" in attributes: | |
| 1266 del attributes["action"] | |
| 1267 if "prompt" in attributes: | |
| 1268 del attributes["prompt"] | |
| 1269 attributes["name"] = "isindex" | |
| 1270 self.processStartTag(impliedTagToken("input", "StartTag", | |
| 1271 attributes=attributes, | |
| 1272 selfClosing=token["selfClosing"])) | |
| 1273 self.processEndTag(impliedTagToken("label")) | |
| 1274 self.processStartTag(impliedTagToken("hr", "StartTag")) | |
| 1275 self.processEndTag(impliedTagToken("form")) | |
| 1276 | |
| 1277 def startTagTextarea(self, token): | |
| 1278 self.tree.insertElement(token) | |
| 1279 self.parser.tokenizer.state = self.parser.tokenizer.rcdataState | |
| 1280 self.processSpaceCharacters = self.processSpaceCharactersDropNewline | |
| 1281 self.parser.framesetOK = False | |
| 1282 | |
| 1283 def startTagIFrame(self, token): | |
| 1284 self.parser.framesetOK = False | |
| 1285 self.startTagRawtext(token) | |
| 1286 | |
| 1287 def startTagNoscript(self, token): | |
| 1288 if self.parser.scripting: | |
| 1289 self.startTagRawtext(token) | |
| 1290 else: | |
| 1291 self.startTagOther(token) | |
| 1292 | |
| 1293 def startTagRawtext(self, token): | |
| 1294 """iframe, noembed noframes, noscript(if scripting enabled)""" | |
| 1295 self.parser.parseRCDataRawtext(token, "RAWTEXT") | |
| 1296 | |
| 1297 def startTagOpt(self, token): | |
| 1298 if self.tree.openElements[-1].name == "option": | |
| 1299 self.parser.phase.processEndTag(impliedTagToken("option")) | |
| 1300 self.tree.reconstructActiveFormattingElements() | |
| 1301 self.parser.tree.insertElement(token) | |
| 1302 | |
| 1303 def startTagSelect(self, token): | |
| 1304 self.tree.reconstructActiveFormattingElements() | |
| 1305 self.tree.insertElement(token) | |
| 1306 self.parser.framesetOK = False | |
| 1307 if self.parser.phase in (self.parser.phases["inTable"], | |
| 1308 self.parser.phases["inCaption"], | |
| 1309 self.parser.phases["inColumnGroup"], | |
| 1310 self.parser.phases["inTableBody"], | |
| 1311 self.parser.phases["inRow"], | |
| 1312 self.parser.phases["inCell"]): | |
| 1313 self.parser.phase = self.parser.phases["inSelectInTable"] | |
| 1314 else: | |
| 1315 self.parser.phase = self.parser.phases["inSelect"] | |
| 1316 | |
| 1317 def startTagRpRt(self, token): | |
| 1318 if self.tree.elementInScope("ruby"): | |
| 1319 self.tree.generateImpliedEndTags() | |
| 1320 if self.tree.openElements[-1].name != "ruby": | |
| 1321 self.parser.parseError() | |
| 1322 self.tree.insertElement(token) | |
| 1323 | |
| 1324 def startTagMath(self, token): | |
| 1325 self.tree.reconstructActiveFormattingElements() | |
| 1326 self.parser.adjustMathMLAttributes(token) | |
| 1327 self.parser.adjustForeignAttributes(token) | |
| 1328 token["namespace"] = namespaces["mathml"] | |
| 1329 self.tree.insertElement(token) | |
| 1330 # Need to get the parse error right for the case where the token | |
| 1331 # has a namespace not equal to the xmlns attribute | |
| 1332 if token["selfClosing"]: | |
| 1333 self.tree.openElements.pop() | |
| 1334 token["selfClosingAcknowledged"] = True | |
| 1335 | |
| 1336 def startTagSvg(self, token): | |
| 1337 self.tree.reconstructActiveFormattingElements() | |
| 1338 self.parser.adjustSVGAttributes(token) | |
| 1339 self.parser.adjustForeignAttributes(token) | |
| 1340 token["namespace"] = namespaces["svg"] | |
| 1341 self.tree.insertElement(token) | |
| 1342 # Need to get the parse error right for the case where the token | |
| 1343 # has a namespace not equal to the xmlns attribute | |
| 1344 if token["selfClosing"]: | |
| 1345 self.tree.openElements.pop() | |
| 1346 token["selfClosingAcknowledged"] = True | |
| 1347 | |
| 1348 def startTagMisplaced(self, token): | |
| 1349 """ Elements that should be children of other elements that have a | |
| 1350 different insertion mode; here they are ignored | |
| 1351 "caption", "col", "colgroup", "frame", "frameset", "head", | |
| 1352 "option", "optgroup", "tbody", "td", "tfoot", "th", "thead", | |
| 1353 "tr", "noscript" | |
| 1354 """ | |
| 1355 self.parser.parseError("unexpected-start-tag-ignored", {"name": token["name"]}) | |
| 1356 | |
| 1357 def startTagOther(self, token): | |
| 1358 self.tree.reconstructActiveFormattingElements() | |
| 1359 self.tree.insertElement(token) | |
| 1360 | |
| 1361 def endTagP(self, token): | |
| 1362 if not self.tree.elementInScope("p", variant="button"): | |
| 1363 self.startTagCloseP(impliedTagToken("p", "StartTag")) | |
| 1364 self.parser.parseError("unexpected-end-tag", {"name": "p"}) | |
| 1365 self.endTagP(impliedTagToken("p", "EndTag")) | |
| 1366 else: | |
| 1367 self.tree.generateImpliedEndTags("p") | |
| 1368 if self.tree.openElements[-1].name != "p": | |
| 1369 self.parser.parseError("unexpected-end-tag", {"name": "p"}) | |
| 1370 node = self.tree.openElements.pop() | |
| 1371 while node.name != "p": | |
| 1372 node = self.tree.openElements.pop() | |
| 1373 | |
| 1374 def endTagBody(self, token): | |
| 1375 if not self.tree.elementInScope("body"): | |
| 1376 self.parser.parseError() | |
| 1377 return | |
| 1378 elif self.tree.openElements[-1].name != "body": | |
| 1379 for node in self.tree.openElements[2:]: | |
| 1380 if node.name not in frozenset(("dd", "dt", "li", "optgroup", | |
| 1381 "option", "p", "rp", "rt", | |
| 1382 "tbody", "td", "tfoot", | |
| 1383 "th", "thead", "tr", "body", | |
| 1384 "html")): | |
| 1385 # Not sure this is the correct name for the parse error | |
| 1386 self.parser.parseError( | |
| 1387 "expected-one-end-tag-but-got-another", | |
| 1388 {"gotName": "body", "expectedName": node.name}) | |
| 1389 break | |
| 1390 self.parser.phase = self.parser.phases["afterBody"] | |
| 1391 | |
| 1392 def endTagHtml(self, token): | |
| 1393 # We repeat the test for the body end tag token being ignored here | |
| 1394 if self.tree.elementInScope("body"): | |
| 1395 self.endTagBody(impliedTagToken("body")) | |
| 1396 return token | |
| 1397 | |
| 1398 def endTagBlock(self, token): | |
| 1399 # Put us back in the right whitespace handling mode | |
| 1400 if token["name"] == "pre": | |
| 1401 self.processSpaceCharacters = self.processSpaceCharactersNonPre | |
| 1402 inScope = self.tree.elementInScope(token["name"]) | |
| 1403 if inScope: | |
| 1404 self.tree.generateImpliedEndTags() | |
| 1405 if self.tree.openElements[-1].name != token["name"]: | |
| 1406 self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | |
| 1407 if inScope: | |
| 1408 node = self.tree.openElements.pop() | |
| 1409 while node.name != token["name"]: | |
| 1410 node = self.tree.openElements.pop() | |
| 1411 | |
| 1412 def endTagForm(self, token): | |
| 1413 node = self.tree.formPointer | |
| 1414 self.tree.formPointer = None | |
| 1415 if node is None or not self.tree.elementInScope(node): | |
| 1416 self.parser.parseError("unexpected-end-tag", | |
| 1417 {"name": "form"}) | |
| 1418 else: | |
| 1419 self.tree.generateImpliedEndTags() | |
| 1420 if self.tree.openElements[-1] != node: | |
| 1421 self.parser.parseError("end-tag-too-early-ignored", | |
| 1422 {"name": "form"}) | |
| 1423 self.tree.openElements.remove(node) | |
| 1424 | |
| 1425 def endTagListItem(self, token): | |
| 1426 if token["name"] == "li": | |
| 1427 variant = "list" | |
| 1428 else: | |
| 1429 variant = None | |
| 1430 if not self.tree.elementInScope(token["name"], variant=variant): | |
| 1431 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 1432 else: | |
| 1433 self.tree.generateImpliedEndTags(exclude=token["name"]) | |
| 1434 if self.tree.openElements[-1].name != token["name"]: | |
| 1435 self.parser.parseError( | |
| 1436 "end-tag-too-early", | |
| 1437 {"name": token["name"]}) | |
| 1438 node = self.tree.openElements.pop() | |
| 1439 while node.name != token["name"]: | |
| 1440 node = self.tree.openElements.pop() | |
| 1441 | |
| 1442 def endTagHeading(self, token): | |
| 1443 for item in headingElements: | |
| 1444 if self.tree.elementInScope(item): | |
| 1445 self.tree.generateImpliedEndTags() | |
| 1446 break | |
| 1447 if self.tree.openElements[-1].name != token["name"]: | |
| 1448 self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | |
| 1449 | |
| 1450 for item in headingElements: | |
| 1451 if self.tree.elementInScope(item): | |
| 1452 item = self.tree.openElements.pop() | |
| 1453 while item.name not in headingElements: | |
| 1454 item = self.tree.openElements.pop() | |
| 1455 break | |
| 1456 | |
| 1457 def endTagFormatting(self, token): | |
| 1458 """The much-feared adoption agency algorithm""" | |
| 1459 # http://svn.whatwg.org/webapps/complete.html#adoptionAgency revision 7867 | |
| 1460 # XXX Better parseError messages appreciated. | |
| 1461 | |
| 1462 # Step 1 | |
| 1463 outerLoopCounter = 0 | |
| 1464 | |
| 1465 # Step 2 | |
| 1466 while outerLoopCounter < 8: | |
| 1467 | |
| 1468 # Step 3 | |
| 1469 outerLoopCounter += 1 | |
| 1470 | |
| 1471 # Step 4: | |
| 1472 | |
| 1473 # Let the formatting element be the last element in | |
| 1474 # the list of active formatting elements that: | |
| 1475 # - is between the end of the list and the last scope | |
| 1476 # marker in the list, if any, or the start of the list | |
| 1477 # otherwise, and | |
| 1478 # - has the same tag name as the token. | |
| 1479 formattingElement = self.tree.elementInActiveFormattingElements( | |
| 1480 token["name"]) | |
| 1481 if (not formattingElement or | |
| 1482 (formattingElement in self.tree.openElements and | |
| 1483 not self.tree.elementInScope(formattingElement.name))): | |
| 1484 # If there is no such node, then abort these steps | |
| 1485 # and instead act as described in the "any other | |
| 1486 # end tag" entry below. | |
| 1487 self.endTagOther(token) | |
| 1488 return | |
| 1489 | |
| 1490 # Otherwise, if there is such a node, but that node is | |
| 1491 # not in the stack of open elements, then this is a | |
| 1492 # parse error; remove the element from the list, and | |
| 1493 # abort these steps. | |
| 1494 elif formattingElement not in self.tree.openElements: | |
| 1495 self.parser.parseError("adoption-agency-1.2", {"name": token["name"]}) | |
| 1496 self.tree.activeFormattingElements.remove(formattingElement) | |
| 1497 return | |
| 1498 | |
| 1499 # Otherwise, if there is such a node, and that node is | |
| 1500 # also in the stack of open elements, but the element | |
| 1501 # is not in scope, then this is a parse error; ignore | |
| 1502 # the token, and abort these steps. | |
| 1503 elif not self.tree.elementInScope(formattingElement.name): | |
| 1504 self.parser.parseError("adoption-agency-4.4", {"name": token["name"]}) | |
| 1505 return | |
| 1506 | |
| 1507 # Otherwise, there is a formatting element and that | |
| 1508 # element is in the stack and is in scope. If the | |
| 1509 # element is not the current node, this is a parse | |
| 1510 # error. In any case, proceed with the algorithm as | |
| 1511 # written in the following steps. | |
| 1512 else: | |
| 1513 if formattingElement != self.tree.openElements[-1]: | |
| 1514 self.parser.parseError("adoption-agency-1.3", {"name": token["name"]}) | |
| 1515 | |
| 1516 # Step 5: | |
| 1517 | |
| 1518 # Let the furthest block be the topmost node in the | |
| 1519 # stack of open elements that is lower in the stack | |
| 1520 # than the formatting element, and is an element in | |
| 1521 # the special category. There might not be one. | |
| 1522 afeIndex = self.tree.openElements.index(formattingElement) | |
| 1523 furthestBlock = None | |
| 1524 for element in self.tree.openElements[afeIndex:]: | |
| 1525 if element.nameTuple in specialElements: | |
| 1526 furthestBlock = element | |
| 1527 break | |
| 1528 | |
| 1529 # Step 6: | |
| 1530 | |
| 1531 # If there is no furthest block, then the UA must | |
| 1532 # first pop all the nodes from the bottom of the stack | |
| 1533 # of open elements, from the current node up to and | |
| 1534 # including the formatting element, then remove the | |
| 1535 # formatting element from the list of active | |
| 1536 # formatting elements, and finally abort these steps. | |
| 1537 if furthestBlock is None: | |
| 1538 element = self.tree.openElements.pop() | |
| 1539 while element != formattingElement: | |
| 1540 element = self.tree.openElements.pop() | |
| 1541 self.tree.activeFormattingElements.remove(element) | |
| 1542 return | |
| 1543 | |
| 1544 # Step 7 | |
| 1545 commonAncestor = self.tree.openElements[afeIndex - 1] | |
| 1546 | |
| 1547 # Step 8: | |
| 1548 # The bookmark is supposed to help us identify where to reinsert | |
| 1549 # nodes in step 15. We have to ensure that we reinsert nodes after | |
| 1550 # the node before the active formatting element. Note the bookmark | |
| 1551 # can move in step 9.7 | |
| 1552 bookmark = self.tree.activeFormattingElements.index(formattingElement) | |
| 1553 | |
| 1554 # Step 9 | |
| 1555 lastNode = node = furthestBlock | |
| 1556 innerLoopCounter = 0 | |
| 1557 | |
| 1558 index = self.tree.openElements.index(node) | |
| 1559 while innerLoopCounter < 3: | |
| 1560 innerLoopCounter += 1 | |
| 1561 # Node is element before node in open elements | |
| 1562 index -= 1 | |
| 1563 node = self.tree.openElements[index] | |
| 1564 if node not in self.tree.activeFormattingElements: | |
| 1565 self.tree.openElements.remove(node) | |
| 1566 continue | |
| 1567 # Step 9.6 | |
| 1568 if node == formattingElement: | |
| 1569 break | |
| 1570 # Step 9.7 | |
| 1571 if lastNode == furthestBlock: | |
| 1572 bookmark = self.tree.activeFormattingElements.index(node) + 1 | |
| 1573 # Step 9.8 | |
| 1574 clone = node.cloneNode() | |
| 1575 # Replace node with clone | |
| 1576 self.tree.activeFormattingElements[ | |
| 1577 self.tree.activeFormattingElements.index(node)] = clone | |
| 1578 self.tree.openElements[ | |
| 1579 self.tree.openElements.index(node)] = clone | |
| 1580 node = clone | |
| 1581 # Step 9.9 | |
| 1582 # Remove lastNode from its parents, if any | |
| 1583 if lastNode.parent: | |
| 1584 lastNode.parent.removeChild(lastNode) | |
| 1585 node.appendChild(lastNode) | |
| 1586 # Step 9.10 | |
| 1587 lastNode = node | |
| 1588 | |
| 1589 # Step 10 | |
| 1590 # Foster parent lastNode if commonAncestor is a | |
| 1591 # table, tbody, tfoot, thead, or tr we need to foster | |
| 1592 # parent the lastNode | |
| 1593 if lastNode.parent: | |
| 1594 lastNode.parent.removeChild(lastNode) | |
| 1595 | |
| 1596 if commonAncestor.name in frozenset(("table", "tbody", "tfoot", "thead", "tr")): | |
| 1597 parent, insertBefore = self.tree.getTableMisnestedNodePosition() | |
| 1598 parent.insertBefore(lastNode, insertBefore) | |
| 1599 else: | |
| 1600 commonAncestor.appendChild(lastNode) | |
| 1601 | |
| 1602 # Step 11 | |
| 1603 clone = formattingElement.cloneNode() | |
| 1604 | |
| 1605 # Step 12 | |
| 1606 furthestBlock.reparentChildren(clone) | |
| 1607 | |
| 1608 # Step 13 | |
| 1609 furthestBlock.appendChild(clone) | |
| 1610 | |
| 1611 # Step 14 | |
| 1612 self.tree.activeFormattingElements.remove(formattingElement) | |
| 1613 self.tree.activeFormattingElements.insert(bookmark, clone) | |
| 1614 | |
| 1615 # Step 15 | |
| 1616 self.tree.openElements.remove(formattingElement) | |
| 1617 self.tree.openElements.insert( | |
| 1618 self.tree.openElements.index(furthestBlock) + 1, clone) | |
| 1619 | |
| 1620 def endTagAppletMarqueeObject(self, token): | |
| 1621 if self.tree.elementInScope(token["name"]): | |
| 1622 self.tree.generateImpliedEndTags() | |
| 1623 if self.tree.openElements[-1].name != token["name"]: | |
| 1624 self.parser.parseError("end-tag-too-early", {"name": token["name"]}) | |
| 1625 | |
| 1626 if self.tree.elementInScope(token["name"]): | |
| 1627 element = self.tree.openElements.pop() | |
| 1628 while element.name != token["name"]: | |
| 1629 element = self.tree.openElements.pop() | |
| 1630 self.tree.clearActiveFormattingElements() | |
| 1631 | |
| 1632 def endTagBr(self, token): | |
| 1633 self.parser.parseError("unexpected-end-tag-treated-as", | |
| 1634 {"originalName": "br", "newName": "br element"}) | |
| 1635 self.tree.reconstructActiveFormattingElements() | |
| 1636 self.tree.insertElement(impliedTagToken("br", "StartTag")) | |
| 1637 self.tree.openElements.pop() | |
| 1638 | |
| 1639 def endTagOther(self, token): | |
| 1640 for node in self.tree.openElements[::-1]: | |
| 1641 if node.name == token["name"]: | |
| 1642 self.tree.generateImpliedEndTags(exclude=token["name"]) | |
| 1643 if self.tree.openElements[-1].name != token["name"]: | |
| 1644 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 1645 while self.tree.openElements.pop() != node: | |
| 1646 pass | |
| 1647 break | |
| 1648 else: | |
| 1649 if node.nameTuple in specialElements: | |
| 1650 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 1651 break | |
| 1652 | |
| 1653 class TextPhase(Phase): | |
| 1654 def __init__(self, parser, tree): | |
| 1655 Phase.__init__(self, parser, tree) | |
| 1656 self.startTagHandler = _utils.MethodDispatcher([]) | |
| 1657 self.startTagHandler.default = self.startTagOther | |
| 1658 self.endTagHandler = _utils.MethodDispatcher([ | |
| 1659 ("script", self.endTagScript)]) | |
| 1660 self.endTagHandler.default = self.endTagOther | |
| 1661 | |
| 1662 def processCharacters(self, token): | |
| 1663 self.tree.insertText(token["data"]) | |
| 1664 | |
| 1665 def processEOF(self): | |
| 1666 self.parser.parseError("expected-named-closing-tag-but-got-eof", | |
| 1667 {"name": self.tree.openElements[-1].name}) | |
| 1668 self.tree.openElements.pop() | |
| 1669 self.parser.phase = self.parser.originalPhase | |
| 1670 return True | |
| 1671 | |
| 1672 def startTagOther(self, token): | |
| 1673 assert False, "Tried to process start tag %s in RCDATA/RAWTEXT mode" % token['name'] | |
| 1674 | |
| 1675 def endTagScript(self, token): | |
| 1676 node = self.tree.openElements.pop() | |
| 1677 assert node.name == "script" | |
| 1678 self.parser.phase = self.parser.originalPhase | |
| 1679 # The rest of this method is all stuff that only happens if | |
| 1680 # document.write works | |
| 1681 | |
| 1682 def endTagOther(self, token): | |
| 1683 self.tree.openElements.pop() | |
| 1684 self.parser.phase = self.parser.originalPhase | |
| 1685 | |
| 1686 class InTablePhase(Phase): | |
| 1687 # http://www.whatwg.org/specs/web-apps/current-work/#in-table | |
| 1688 def __init__(self, parser, tree): | |
| 1689 Phase.__init__(self, parser, tree) | |
| 1690 self.startTagHandler = _utils.MethodDispatcher([ | |
| 1691 ("html", self.startTagHtml), | |
| 1692 ("caption", self.startTagCaption), | |
| 1693 ("colgroup", self.startTagColgroup), | |
| 1694 ("col", self.startTagCol), | |
| 1695 (("tbody", "tfoot", "thead"), self.startTagRowGroup), | |
| 1696 (("td", "th", "tr"), self.startTagImplyTbody), | |
| 1697 ("table", self.startTagTable), | |
| 1698 (("style", "script"), self.startTagStyleScript), | |
| 1699 ("input", self.startTagInput), | |
| 1700 ("form", self.startTagForm) | |
| 1701 ]) | |
| 1702 self.startTagHandler.default = self.startTagOther | |
| 1703 | |
| 1704 self.endTagHandler = _utils.MethodDispatcher([ | |
| 1705 ("table", self.endTagTable), | |
| 1706 (("body", "caption", "col", "colgroup", "html", "tbody", "td", | |
| 1707 "tfoot", "th", "thead", "tr"), self.endTagIgnore) | |
| 1708 ]) | |
| 1709 self.endTagHandler.default = self.endTagOther | |
| 1710 | |
| 1711 # helper methods | |
| 1712 def clearStackToTableContext(self): | |
| 1713 # "clear the stack back to a table context" | |
| 1714 while self.tree.openElements[-1].name not in ("table", "html"): | |
| 1715 # self.parser.parseError("unexpected-implied-end-tag-in-table", | |
| 1716 # {"name": self.tree.openElements[-1].name}) | |
| 1717 self.tree.openElements.pop() | |
| 1718 # When the current node is <html> it's an innerHTML case | |
| 1719 | |
| 1720 # processing methods | |
| 1721 def processEOF(self): | |
| 1722 if self.tree.openElements[-1].name != "html": | |
| 1723 self.parser.parseError("eof-in-table") | |
| 1724 else: | |
| 1725 assert self.parser.innerHTML | |
| 1726 # Stop parsing | |
| 1727 | |
| 1728 def processSpaceCharacters(self, token): | |
| 1729 originalPhase = self.parser.phase | |
| 1730 self.parser.phase = self.parser.phases["inTableText"] | |
| 1731 self.parser.phase.originalPhase = originalPhase | |
| 1732 self.parser.phase.processSpaceCharacters(token) | |
| 1733 | |
| 1734 def processCharacters(self, token): | |
| 1735 originalPhase = self.parser.phase | |
| 1736 self.parser.phase = self.parser.phases["inTableText"] | |
| 1737 self.parser.phase.originalPhase = originalPhase | |
| 1738 self.parser.phase.processCharacters(token) | |
| 1739 | |
| 1740 def insertText(self, token): | |
| 1741 # If we get here there must be at least one non-whitespace character | |
| 1742 # Do the table magic! | |
| 1743 self.tree.insertFromTable = True | |
| 1744 self.parser.phases["inBody"].processCharacters(token) | |
| 1745 self.tree.insertFromTable = False | |
| 1746 | |
| 1747 def startTagCaption(self, token): | |
| 1748 self.clearStackToTableContext() | |
| 1749 self.tree.activeFormattingElements.append(Marker) | |
| 1750 self.tree.insertElement(token) | |
| 1751 self.parser.phase = self.parser.phases["inCaption"] | |
| 1752 | |
| 1753 def startTagColgroup(self, token): | |
| 1754 self.clearStackToTableContext() | |
| 1755 self.tree.insertElement(token) | |
| 1756 self.parser.phase = self.parser.phases["inColumnGroup"] | |
| 1757 | |
| 1758 def startTagCol(self, token): | |
| 1759 self.startTagColgroup(impliedTagToken("colgroup", "StartTag")) | |
| 1760 return token | |
| 1761 | |
| 1762 def startTagRowGroup(self, token): | |
| 1763 self.clearStackToTableContext() | |
| 1764 self.tree.insertElement(token) | |
| 1765 self.parser.phase = self.parser.phases["inTableBody"] | |
| 1766 | |
| 1767 def startTagImplyTbody(self, token): | |
| 1768 self.startTagRowGroup(impliedTagToken("tbody", "StartTag")) | |
| 1769 return token | |
| 1770 | |
| 1771 def startTagTable(self, token): | |
| 1772 self.parser.parseError("unexpected-start-tag-implies-end-tag", | |
| 1773 {"startName": "table", "endName": "table"}) | |
| 1774 self.parser.phase.processEndTag(impliedTagToken("table")) | |
| 1775 if not self.parser.innerHTML: | |
| 1776 return token | |
| 1777 | |
| 1778 def startTagStyleScript(self, token): | |
| 1779 return self.parser.phases["inHead"].processStartTag(token) | |
| 1780 | |
| 1781 def startTagInput(self, token): | |
| 1782 if ("type" in token["data"] and | |
| 1783 token["data"]["type"].translate(asciiUpper2Lower) == "hidden"): | |
| 1784 self.parser.parseError("unexpected-hidden-input-in-table") | |
| 1785 self.tree.insertElement(token) | |
| 1786 # XXX associate with form | |
| 1787 self.tree.openElements.pop() | |
| 1788 else: | |
| 1789 self.startTagOther(token) | |
| 1790 | |
| 1791 def startTagForm(self, token): | |
| 1792 self.parser.parseError("unexpected-form-in-table") | |
| 1793 if self.tree.formPointer is None: | |
| 1794 self.tree.insertElement(token) | |
| 1795 self.tree.formPointer = self.tree.openElements[-1] | |
| 1796 self.tree.openElements.pop() | |
| 1797 | |
| 1798 def startTagOther(self, token): | |
| 1799 self.parser.parseError("unexpected-start-tag-implies-table-voodoo", {"name": token["name"]}) | |
| 1800 # Do the table magic! | |
| 1801 self.tree.insertFromTable = True | |
| 1802 self.parser.phases["inBody"].processStartTag(token) | |
| 1803 self.tree.insertFromTable = False | |
| 1804 | |
| 1805 def endTagTable(self, token): | |
| 1806 if self.tree.elementInScope("table", variant="table"): | |
| 1807 self.tree.generateImpliedEndTags() | |
| 1808 if self.tree.openElements[-1].name != "table": | |
| 1809 self.parser.parseError("end-tag-too-early-named", | |
| 1810 {"gotName": "table", | |
| 1811 "expectedName": self.tree.openElements[-1].name}) | |
| 1812 while self.tree.openElements[-1].name != "table": | |
| 1813 self.tree.openElements.pop() | |
| 1814 self.tree.openElements.pop() | |
| 1815 self.parser.resetInsertionMode() | |
| 1816 else: | |
| 1817 # innerHTML case | |
| 1818 assert self.parser.innerHTML | |
| 1819 self.parser.parseError() | |
| 1820 | |
| 1821 def endTagIgnore(self, token): | |
| 1822 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 1823 | |
| 1824 def endTagOther(self, token): | |
| 1825 self.parser.parseError("unexpected-end-tag-implies-table-voodoo", {"name": token["name"]}) | |
| 1826 # Do the table magic! | |
| 1827 self.tree.insertFromTable = True | |
| 1828 self.parser.phases["inBody"].processEndTag(token) | |
| 1829 self.tree.insertFromTable = False | |
| 1830 | |
| 1831 class InTableTextPhase(Phase): | |
| 1832 def __init__(self, parser, tree): | |
| 1833 Phase.__init__(self, parser, tree) | |
| 1834 self.originalPhase = None | |
| 1835 self.characterTokens = [] | |
| 1836 | |
| 1837 def flushCharacters(self): | |
| 1838 data = "".join([item["data"] for item in self.characterTokens]) | |
| 1839 if any([item not in spaceCharacters for item in data]): | |
| 1840 token = {"type": tokenTypes["Characters"], "data": data} | |
| 1841 self.parser.phases["inTable"].insertText(token) | |
| 1842 elif data: | |
| 1843 self.tree.insertText(data) | |
| 1844 self.characterTokens = [] | |
| 1845 | |
| 1846 def processComment(self, token): | |
| 1847 self.flushCharacters() | |
| 1848 self.parser.phase = self.originalPhase | |
| 1849 return token | |
| 1850 | |
| 1851 def processEOF(self): | |
| 1852 self.flushCharacters() | |
| 1853 self.parser.phase = self.originalPhase | |
| 1854 return True | |
| 1855 | |
| 1856 def processCharacters(self, token): | |
| 1857 if token["data"] == "\u0000": | |
| 1858 return | |
| 1859 self.characterTokens.append(token) | |
| 1860 | |
| 1861 def processSpaceCharacters(self, token): | |
| 1862 # pretty sure we should never reach here | |
| 1863 self.characterTokens.append(token) | |
| 1864 # assert False | |
| 1865 | |
| 1866 def processStartTag(self, token): | |
| 1867 self.flushCharacters() | |
| 1868 self.parser.phase = self.originalPhase | |
| 1869 return token | |
| 1870 | |
| 1871 def processEndTag(self, token): | |
| 1872 self.flushCharacters() | |
| 1873 self.parser.phase = self.originalPhase | |
| 1874 return token | |
| 1875 | |
| 1876 class InCaptionPhase(Phase): | |
| 1877 # http://www.whatwg.org/specs/web-apps/current-work/#in-caption | |
| 1878 def __init__(self, parser, tree): | |
| 1879 Phase.__init__(self, parser, tree) | |
| 1880 | |
| 1881 self.startTagHandler = _utils.MethodDispatcher([ | |
| 1882 ("html", self.startTagHtml), | |
| 1883 (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", | |
| 1884 "thead", "tr"), self.startTagTableElement) | |
| 1885 ]) | |
| 1886 self.startTagHandler.default = self.startTagOther | |
| 1887 | |
| 1888 self.endTagHandler = _utils.MethodDispatcher([ | |
| 1889 ("caption", self.endTagCaption), | |
| 1890 ("table", self.endTagTable), | |
| 1891 (("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", | |
| 1892 "thead", "tr"), self.endTagIgnore) | |
| 1893 ]) | |
| 1894 self.endTagHandler.default = self.endTagOther | |
| 1895 | |
| 1896 def ignoreEndTagCaption(self): | |
| 1897 return not self.tree.elementInScope("caption", variant="table") | |
| 1898 | |
| 1899 def processEOF(self): | |
| 1900 self.parser.phases["inBody"].processEOF() | |
| 1901 | |
| 1902 def processCharacters(self, token): | |
| 1903 return self.parser.phases["inBody"].processCharacters(token) | |
| 1904 | |
| 1905 def startTagTableElement(self, token): | |
| 1906 self.parser.parseError() | |
| 1907 # XXX Have to duplicate logic here to find out if the tag is ignored | |
| 1908 ignoreEndTag = self.ignoreEndTagCaption() | |
| 1909 self.parser.phase.processEndTag(impliedTagToken("caption")) | |
| 1910 if not ignoreEndTag: | |
| 1911 return token | |
| 1912 | |
| 1913 def startTagOther(self, token): | |
| 1914 return self.parser.phases["inBody"].processStartTag(token) | |
| 1915 | |
| 1916 def endTagCaption(self, token): | |
| 1917 if not self.ignoreEndTagCaption(): | |
| 1918 # AT this code is quite similar to endTagTable in "InTable" | |
| 1919 self.tree.generateImpliedEndTags() | |
| 1920 if self.tree.openElements[-1].name != "caption": | |
| 1921 self.parser.parseError("expected-one-end-tag-but-got-another", | |
| 1922 {"gotName": "caption", | |
| 1923 "expectedName": self.tree.openElements[-1].name}) | |
| 1924 while self.tree.openElements[-1].name != "caption": | |
| 1925 self.tree.openElements.pop() | |
| 1926 self.tree.openElements.pop() | |
| 1927 self.tree.clearActiveFormattingElements() | |
| 1928 self.parser.phase = self.parser.phases["inTable"] | |
| 1929 else: | |
| 1930 # innerHTML case | |
| 1931 assert self.parser.innerHTML | |
| 1932 self.parser.parseError() | |
| 1933 | |
| 1934 def endTagTable(self, token): | |
| 1935 self.parser.parseError() | |
| 1936 ignoreEndTag = self.ignoreEndTagCaption() | |
| 1937 self.parser.phase.processEndTag(impliedTagToken("caption")) | |
| 1938 if not ignoreEndTag: | |
| 1939 return token | |
| 1940 | |
| 1941 def endTagIgnore(self, token): | |
| 1942 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 1943 | |
| 1944 def endTagOther(self, token): | |
| 1945 return self.parser.phases["inBody"].processEndTag(token) | |
| 1946 | |
| 1947 class InColumnGroupPhase(Phase): | |
| 1948 # http://www.whatwg.org/specs/web-apps/current-work/#in-column | |
| 1949 | |
| 1950 def __init__(self, parser, tree): | |
| 1951 Phase.__init__(self, parser, tree) | |
| 1952 | |
| 1953 self.startTagHandler = _utils.MethodDispatcher([ | |
| 1954 ("html", self.startTagHtml), | |
| 1955 ("col", self.startTagCol) | |
| 1956 ]) | |
| 1957 self.startTagHandler.default = self.startTagOther | |
| 1958 | |
| 1959 self.endTagHandler = _utils.MethodDispatcher([ | |
| 1960 ("colgroup", self.endTagColgroup), | |
| 1961 ("col", self.endTagCol) | |
| 1962 ]) | |
| 1963 self.endTagHandler.default = self.endTagOther | |
| 1964 | |
| 1965 def ignoreEndTagColgroup(self): | |
| 1966 return self.tree.openElements[-1].name == "html" | |
| 1967 | |
| 1968 def processEOF(self): | |
| 1969 if self.tree.openElements[-1].name == "html": | |
| 1970 assert self.parser.innerHTML | |
| 1971 return | |
| 1972 else: | |
| 1973 ignoreEndTag = self.ignoreEndTagColgroup() | |
| 1974 self.endTagColgroup(impliedTagToken("colgroup")) | |
| 1975 if not ignoreEndTag: | |
| 1976 return True | |
| 1977 | |
| 1978 def processCharacters(self, token): | |
| 1979 ignoreEndTag = self.ignoreEndTagColgroup() | |
| 1980 self.endTagColgroup(impliedTagToken("colgroup")) | |
| 1981 if not ignoreEndTag: | |
| 1982 return token | |
| 1983 | |
| 1984 def startTagCol(self, token): | |
| 1985 self.tree.insertElement(token) | |
| 1986 self.tree.openElements.pop() | |
| 1987 token["selfClosingAcknowledged"] = True | |
| 1988 | |
| 1989 def startTagOther(self, token): | |
| 1990 ignoreEndTag = self.ignoreEndTagColgroup() | |
| 1991 self.endTagColgroup(impliedTagToken("colgroup")) | |
| 1992 if not ignoreEndTag: | |
| 1993 return token | |
| 1994 | |
| 1995 def endTagColgroup(self, token): | |
| 1996 if self.ignoreEndTagColgroup(): | |
| 1997 # innerHTML case | |
| 1998 assert self.parser.innerHTML | |
| 1999 self.parser.parseError() | |
| 2000 else: | |
| 2001 self.tree.openElements.pop() | |
| 2002 self.parser.phase = self.parser.phases["inTable"] | |
| 2003 | |
| 2004 def endTagCol(self, token): | |
| 2005 self.parser.parseError("no-end-tag", {"name": "col"}) | |
| 2006 | |
| 2007 def endTagOther(self, token): | |
| 2008 ignoreEndTag = self.ignoreEndTagColgroup() | |
| 2009 self.endTagColgroup(impliedTagToken("colgroup")) | |
| 2010 if not ignoreEndTag: | |
| 2011 return token | |
| 2012 | |
| 2013 class InTableBodyPhase(Phase): | |
| 2014 # http://www.whatwg.org/specs/web-apps/current-work/#in-table0 | |
| 2015 def __init__(self, parser, tree): | |
| 2016 Phase.__init__(self, parser, tree) | |
| 2017 self.startTagHandler = _utils.MethodDispatcher([ | |
| 2018 ("html", self.startTagHtml), | |
| 2019 ("tr", self.startTagTr), | |
| 2020 (("td", "th"), self.startTagTableCell), | |
| 2021 (("caption", "col", "colgroup", "tbody", "tfoot", "thead"), | |
| 2022 self.startTagTableOther) | |
| 2023 ]) | |
| 2024 self.startTagHandler.default = self.startTagOther | |
| 2025 | |
| 2026 self.endTagHandler = _utils.MethodDispatcher([ | |
| 2027 (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), | |
| 2028 ("table", self.endTagTable), | |
| 2029 (("body", "caption", "col", "colgroup", "html", "td", "th", | |
| 2030 "tr"), self.endTagIgnore) | |
| 2031 ]) | |
| 2032 self.endTagHandler.default = self.endTagOther | |
| 2033 | |
| 2034 # helper methods | |
| 2035 def clearStackToTableBodyContext(self): | |
| 2036 while self.tree.openElements[-1].name not in ("tbody", "tfoot", | |
| 2037 "thead", "html"): | |
| 2038 # self.parser.parseError("unexpected-implied-end-tag-in-table", | |
| 2039 # {"name": self.tree.openElements[-1].name}) | |
| 2040 self.tree.openElements.pop() | |
| 2041 if self.tree.openElements[-1].name == "html": | |
| 2042 assert self.parser.innerHTML | |
| 2043 | |
| 2044 # the rest | |
| 2045 def processEOF(self): | |
| 2046 self.parser.phases["inTable"].processEOF() | |
| 2047 | |
| 2048 def processSpaceCharacters(self, token): | |
| 2049 return self.parser.phases["inTable"].processSpaceCharacters(token) | |
| 2050 | |
| 2051 def processCharacters(self, token): | |
| 2052 return self.parser.phases["inTable"].processCharacters(token) | |
| 2053 | |
| 2054 def startTagTr(self, token): | |
| 2055 self.clearStackToTableBodyContext() | |
| 2056 self.tree.insertElement(token) | |
| 2057 self.parser.phase = self.parser.phases["inRow"] | |
| 2058 | |
| 2059 def startTagTableCell(self, token): | |
| 2060 self.parser.parseError("unexpected-cell-in-table-body", | |
| 2061 {"name": token["name"]}) | |
| 2062 self.startTagTr(impliedTagToken("tr", "StartTag")) | |
| 2063 return token | |
| 2064 | |
| 2065 def startTagTableOther(self, token): | |
| 2066 # XXX AT Any ideas on how to share this with endTagTable? | |
| 2067 if (self.tree.elementInScope("tbody", variant="table") or | |
| 2068 self.tree.elementInScope("thead", variant="table") or | |
| 2069 self.tree.elementInScope("tfoot", variant="table")): | |
| 2070 self.clearStackToTableBodyContext() | |
| 2071 self.endTagTableRowGroup( | |
| 2072 impliedTagToken(self.tree.openElements[-1].name)) | |
| 2073 return token | |
| 2074 else: | |
| 2075 # innerHTML case | |
| 2076 assert self.parser.innerHTML | |
| 2077 self.parser.parseError() | |
| 2078 | |
| 2079 def startTagOther(self, token): | |
| 2080 return self.parser.phases["inTable"].processStartTag(token) | |
| 2081 | |
| 2082 def endTagTableRowGroup(self, token): | |
| 2083 if self.tree.elementInScope(token["name"], variant="table"): | |
| 2084 self.clearStackToTableBodyContext() | |
| 2085 self.tree.openElements.pop() | |
| 2086 self.parser.phase = self.parser.phases["inTable"] | |
| 2087 else: | |
| 2088 self.parser.parseError("unexpected-end-tag-in-table-body", | |
| 2089 {"name": token["name"]}) | |
| 2090 | |
| 2091 def endTagTable(self, token): | |
| 2092 if (self.tree.elementInScope("tbody", variant="table") or | |
| 2093 self.tree.elementInScope("thead", variant="table") or | |
| 2094 self.tree.elementInScope("tfoot", variant="table")): | |
| 2095 self.clearStackToTableBodyContext() | |
| 2096 self.endTagTableRowGroup( | |
| 2097 impliedTagToken(self.tree.openElements[-1].name)) | |
| 2098 return token | |
| 2099 else: | |
| 2100 # innerHTML case | |
| 2101 assert self.parser.innerHTML | |
| 2102 self.parser.parseError() | |
| 2103 | |
| 2104 def endTagIgnore(self, token): | |
| 2105 self.parser.parseError("unexpected-end-tag-in-table-body", | |
| 2106 {"name": token["name"]}) | |
| 2107 | |
| 2108 def endTagOther(self, token): | |
| 2109 return self.parser.phases["inTable"].processEndTag(token) | |
| 2110 | |
| 2111 class InRowPhase(Phase): | |
| 2112 # http://www.whatwg.org/specs/web-apps/current-work/#in-row | |
| 2113 def __init__(self, parser, tree): | |
| 2114 Phase.__init__(self, parser, tree) | |
| 2115 self.startTagHandler = _utils.MethodDispatcher([ | |
| 2116 ("html", self.startTagHtml), | |
| 2117 (("td", "th"), self.startTagTableCell), | |
| 2118 (("caption", "col", "colgroup", "tbody", "tfoot", "thead", | |
| 2119 "tr"), self.startTagTableOther) | |
| 2120 ]) | |
| 2121 self.startTagHandler.default = self.startTagOther | |
| 2122 | |
| 2123 self.endTagHandler = _utils.MethodDispatcher([ | |
| 2124 ("tr", self.endTagTr), | |
| 2125 ("table", self.endTagTable), | |
| 2126 (("tbody", "tfoot", "thead"), self.endTagTableRowGroup), | |
| 2127 (("body", "caption", "col", "colgroup", "html", "td", "th"), | |
| 2128 self.endTagIgnore) | |
| 2129 ]) | |
| 2130 self.endTagHandler.default = self.endTagOther | |
| 2131 | |
| 2132 # helper methods (XXX unify this with other table helper methods) | |
| 2133 def clearStackToTableRowContext(self): | |
| 2134 while self.tree.openElements[-1].name not in ("tr", "html"): | |
| 2135 self.parser.parseError("unexpected-implied-end-tag-in-table-row", | |
| 2136 {"name": self.tree.openElements[-1].name}) | |
| 2137 self.tree.openElements.pop() | |
| 2138 | |
| 2139 def ignoreEndTagTr(self): | |
| 2140 return not self.tree.elementInScope("tr", variant="table") | |
| 2141 | |
| 2142 # the rest | |
| 2143 def processEOF(self): | |
| 2144 self.parser.phases["inTable"].processEOF() | |
| 2145 | |
| 2146 def processSpaceCharacters(self, token): | |
| 2147 return self.parser.phases["inTable"].processSpaceCharacters(token) | |
| 2148 | |
| 2149 def processCharacters(self, token): | |
| 2150 return self.parser.phases["inTable"].processCharacters(token) | |
| 2151 | |
| 2152 def startTagTableCell(self, token): | |
| 2153 self.clearStackToTableRowContext() | |
| 2154 self.tree.insertElement(token) | |
| 2155 self.parser.phase = self.parser.phases["inCell"] | |
| 2156 self.tree.activeFormattingElements.append(Marker) | |
| 2157 | |
| 2158 def startTagTableOther(self, token): | |
| 2159 ignoreEndTag = self.ignoreEndTagTr() | |
| 2160 self.endTagTr(impliedTagToken("tr")) | |
| 2161 # XXX how are we sure it's always ignored in the innerHTML case? | |
| 2162 if not ignoreEndTag: | |
| 2163 return token | |
| 2164 | |
| 2165 def startTagOther(self, token): | |
| 2166 return self.parser.phases["inTable"].processStartTag(token) | |
| 2167 | |
| 2168 def endTagTr(self, token): | |
| 2169 if not self.ignoreEndTagTr(): | |
| 2170 self.clearStackToTableRowContext() | |
| 2171 self.tree.openElements.pop() | |
| 2172 self.parser.phase = self.parser.phases["inTableBody"] | |
| 2173 else: | |
| 2174 # innerHTML case | |
| 2175 assert self.parser.innerHTML | |
| 2176 self.parser.parseError() | |
| 2177 | |
| 2178 def endTagTable(self, token): | |
| 2179 ignoreEndTag = self.ignoreEndTagTr() | |
| 2180 self.endTagTr(impliedTagToken("tr")) | |
| 2181 # Reprocess the current tag if the tr end tag was not ignored | |
| 2182 # XXX how are we sure it's always ignored in the innerHTML case? | |
| 2183 if not ignoreEndTag: | |
| 2184 return token | |
| 2185 | |
| 2186 def endTagTableRowGroup(self, token): | |
| 2187 if self.tree.elementInScope(token["name"], variant="table"): | |
| 2188 self.endTagTr(impliedTagToken("tr")) | |
| 2189 return token | |
| 2190 else: | |
| 2191 self.parser.parseError() | |
| 2192 | |
| 2193 def endTagIgnore(self, token): | |
| 2194 self.parser.parseError("unexpected-end-tag-in-table-row", | |
| 2195 {"name": token["name"]}) | |
| 2196 | |
| 2197 def endTagOther(self, token): | |
| 2198 return self.parser.phases["inTable"].processEndTag(token) | |
| 2199 | |
| 2200 class InCellPhase(Phase): | |
| 2201 # http://www.whatwg.org/specs/web-apps/current-work/#in-cell | |
| 2202 def __init__(self, parser, tree): | |
| 2203 Phase.__init__(self, parser, tree) | |
| 2204 self.startTagHandler = _utils.MethodDispatcher([ | |
| 2205 ("html", self.startTagHtml), | |
| 2206 (("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", | |
| 2207 "thead", "tr"), self.startTagTableOther) | |
| 2208 ]) | |
| 2209 self.startTagHandler.default = self.startTagOther | |
| 2210 | |
| 2211 self.endTagHandler = _utils.MethodDispatcher([ | |
| 2212 (("td", "th"), self.endTagTableCell), | |
| 2213 (("body", "caption", "col", "colgroup", "html"), self.endTagIgnore), | |
| 2214 (("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply) | |
| 2215 ]) | |
| 2216 self.endTagHandler.default = self.endTagOther | |
| 2217 | |
| 2218 # helper | |
| 2219 def closeCell(self): | |
| 2220 if self.tree.elementInScope("td", variant="table"): | |
| 2221 self.endTagTableCell(impliedTagToken("td")) | |
| 2222 elif self.tree.elementInScope("th", variant="table"): | |
| 2223 self.endTagTableCell(impliedTagToken("th")) | |
| 2224 | |
| 2225 # the rest | |
| 2226 def processEOF(self): | |
| 2227 self.parser.phases["inBody"].processEOF() | |
| 2228 | |
| 2229 def processCharacters(self, token): | |
| 2230 return self.parser.phases["inBody"].processCharacters(token) | |
| 2231 | |
| 2232 def startTagTableOther(self, token): | |
| 2233 if (self.tree.elementInScope("td", variant="table") or | |
| 2234 self.tree.elementInScope("th", variant="table")): | |
| 2235 self.closeCell() | |
| 2236 return token | |
| 2237 else: | |
| 2238 # innerHTML case | |
| 2239 assert self.parser.innerHTML | |
| 2240 self.parser.parseError() | |
| 2241 | |
| 2242 def startTagOther(self, token): | |
| 2243 return self.parser.phases["inBody"].processStartTag(token) | |
| 2244 | |
| 2245 def endTagTableCell(self, token): | |
| 2246 if self.tree.elementInScope(token["name"], variant="table"): | |
| 2247 self.tree.generateImpliedEndTags(token["name"]) | |
| 2248 if self.tree.openElements[-1].name != token["name"]: | |
| 2249 self.parser.parseError("unexpected-cell-end-tag", | |
| 2250 {"name": token["name"]}) | |
| 2251 while True: | |
| 2252 node = self.tree.openElements.pop() | |
| 2253 if node.name == token["name"]: | |
| 2254 break | |
| 2255 else: | |
| 2256 self.tree.openElements.pop() | |
| 2257 self.tree.clearActiveFormattingElements() | |
| 2258 self.parser.phase = self.parser.phases["inRow"] | |
| 2259 else: | |
| 2260 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 2261 | |
| 2262 def endTagIgnore(self, token): | |
| 2263 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 2264 | |
| 2265 def endTagImply(self, token): | |
| 2266 if self.tree.elementInScope(token["name"], variant="table"): | |
| 2267 self.closeCell() | |
| 2268 return token | |
| 2269 else: | |
| 2270 # sometimes innerHTML case | |
| 2271 self.parser.parseError() | |
| 2272 | |
| 2273 def endTagOther(self, token): | |
| 2274 return self.parser.phases["inBody"].processEndTag(token) | |
| 2275 | |
| 2276 class InSelectPhase(Phase): | |
| 2277 def __init__(self, parser, tree): | |
| 2278 Phase.__init__(self, parser, tree) | |
| 2279 | |
| 2280 self.startTagHandler = _utils.MethodDispatcher([ | |
| 2281 ("html", self.startTagHtml), | |
| 2282 ("option", self.startTagOption), | |
| 2283 ("optgroup", self.startTagOptgroup), | |
| 2284 ("select", self.startTagSelect), | |
| 2285 (("input", "keygen", "textarea"), self.startTagInput), | |
| 2286 ("script", self.startTagScript) | |
| 2287 ]) | |
| 2288 self.startTagHandler.default = self.startTagOther | |
| 2289 | |
| 2290 self.endTagHandler = _utils.MethodDispatcher([ | |
| 2291 ("option", self.endTagOption), | |
| 2292 ("optgroup", self.endTagOptgroup), | |
| 2293 ("select", self.endTagSelect) | |
| 2294 ]) | |
| 2295 self.endTagHandler.default = self.endTagOther | |
| 2296 | |
| 2297 # http://www.whatwg.org/specs/web-apps/current-work/#in-select | |
| 2298 def processEOF(self): | |
| 2299 if self.tree.openElements[-1].name != "html": | |
| 2300 self.parser.parseError("eof-in-select") | |
| 2301 else: | |
| 2302 assert self.parser.innerHTML | |
| 2303 | |
| 2304 def processCharacters(self, token): | |
| 2305 if token["data"] == "\u0000": | |
| 2306 return | |
| 2307 self.tree.insertText(token["data"]) | |
| 2308 | |
| 2309 def startTagOption(self, token): | |
| 2310 # We need to imply </option> if <option> is the current node. | |
| 2311 if self.tree.openElements[-1].name == "option": | |
| 2312 self.tree.openElements.pop() | |
| 2313 self.tree.insertElement(token) | |
| 2314 | |
| 2315 def startTagOptgroup(self, token): | |
| 2316 if self.tree.openElements[-1].name == "option": | |
| 2317 self.tree.openElements.pop() | |
| 2318 if self.tree.openElements[-1].name == "optgroup": | |
| 2319 self.tree.openElements.pop() | |
| 2320 self.tree.insertElement(token) | |
| 2321 | |
| 2322 def startTagSelect(self, token): | |
| 2323 self.parser.parseError("unexpected-select-in-select") | |
| 2324 self.endTagSelect(impliedTagToken("select")) | |
| 2325 | |
| 2326 def startTagInput(self, token): | |
| 2327 self.parser.parseError("unexpected-input-in-select") | |
| 2328 if self.tree.elementInScope("select", variant="select"): | |
| 2329 self.endTagSelect(impliedTagToken("select")) | |
| 2330 return token | |
| 2331 else: | |
| 2332 assert self.parser.innerHTML | |
| 2333 | |
| 2334 def startTagScript(self, token): | |
| 2335 return self.parser.phases["inHead"].processStartTag(token) | |
| 2336 | |
| 2337 def startTagOther(self, token): | |
| 2338 self.parser.parseError("unexpected-start-tag-in-select", | |
| 2339 {"name": token["name"]}) | |
| 2340 | |
| 2341 def endTagOption(self, token): | |
| 2342 if self.tree.openElements[-1].name == "option": | |
| 2343 self.tree.openElements.pop() | |
| 2344 else: | |
| 2345 self.parser.parseError("unexpected-end-tag-in-select", | |
| 2346 {"name": "option"}) | |
| 2347 | |
| 2348 def endTagOptgroup(self, token): | |
| 2349 # </optgroup> implicitly closes <option> | |
| 2350 if (self.tree.openElements[-1].name == "option" and | |
| 2351 self.tree.openElements[-2].name == "optgroup"): | |
| 2352 self.tree.openElements.pop() | |
| 2353 # It also closes </optgroup> | |
| 2354 if self.tree.openElements[-1].name == "optgroup": | |
| 2355 self.tree.openElements.pop() | |
| 2356 # But nothing else | |
| 2357 else: | |
| 2358 self.parser.parseError("unexpected-end-tag-in-select", | |
| 2359 {"name": "optgroup"}) | |
| 2360 | |
| 2361 def endTagSelect(self, token): | |
| 2362 if self.tree.elementInScope("select", variant="select"): | |
| 2363 node = self.tree.openElements.pop() | |
| 2364 while node.name != "select": | |
| 2365 node = self.tree.openElements.pop() | |
| 2366 self.parser.resetInsertionMode() | |
| 2367 else: | |
| 2368 # innerHTML case | |
| 2369 assert self.parser.innerHTML | |
| 2370 self.parser.parseError() | |
| 2371 | |
| 2372 def endTagOther(self, token): | |
| 2373 self.parser.parseError("unexpected-end-tag-in-select", | |
| 2374 {"name": token["name"]}) | |
| 2375 | |
| 2376 class InSelectInTablePhase(Phase): | |
| 2377 def __init__(self, parser, tree): | |
| 2378 Phase.__init__(self, parser, tree) | |
| 2379 | |
| 2380 self.startTagHandler = _utils.MethodDispatcher([ | |
| 2381 (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), | |
| 2382 self.startTagTable) | |
| 2383 ]) | |
| 2384 self.startTagHandler.default = self.startTagOther | |
| 2385 | |
| 2386 self.endTagHandler = _utils.MethodDispatcher([ | |
| 2387 (("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"), | |
| 2388 self.endTagTable) | |
| 2389 ]) | |
| 2390 self.endTagHandler.default = self.endTagOther | |
| 2391 | |
| 2392 def processEOF(self): | |
| 2393 self.parser.phases["inSelect"].processEOF() | |
| 2394 | |
| 2395 def processCharacters(self, token): | |
| 2396 return self.parser.phases["inSelect"].processCharacters(token) | |
| 2397 | |
| 2398 def startTagTable(self, token): | |
| 2399 self.parser.parseError("unexpected-table-element-start-tag-in-select-in-table", {"name": token["name"]}) | |
| 2400 self.endTagOther(impliedTagToken("select")) | |
| 2401 return token | |
| 2402 | |
| 2403 def startTagOther(self, token): | |
| 2404 return self.parser.phases["inSelect"].processStartTag(token) | |
| 2405 | |
| 2406 def endTagTable(self, token): | |
| 2407 self.parser.parseError("unexpected-table-element-end-tag-in-select-in-table", {"name": token["name"]}) | |
| 2408 if self.tree.elementInScope(token["name"], variant="table"): | |
| 2409 self.endTagOther(impliedTagToken("select")) | |
| 2410 return token | |
| 2411 | |
| 2412 def endTagOther(self, token): | |
| 2413 return self.parser.phases["inSelect"].processEndTag(token) | |
| 2414 | |
| 2415 class InForeignContentPhase(Phase): | |
| 2416 breakoutElements = frozenset(["b", "big", "blockquote", "body", "br", | |
| 2417 "center", "code", "dd", "div", "dl", "dt", | |
| 2418 "em", "embed", "h1", "h2", "h3", | |
| 2419 "h4", "h5", "h6", "head", "hr", "i", "img", | |
| 2420 "li", "listing", "menu", "meta", "nobr", | |
| 2421 "ol", "p", "pre", "ruby", "s", "small", | |
| 2422 "span", "strong", "strike", "sub", "sup", | |
| 2423 "table", "tt", "u", "ul", "var"]) | |
| 2424 | |
| 2425 def __init__(self, parser, tree): | |
| 2426 Phase.__init__(self, parser, tree) | |
| 2427 | |
| 2428 def adjustSVGTagNames(self, token): | |
| 2429 replacements = {"altglyph": "altGlyph", | |
| 2430 "altglyphdef": "altGlyphDef", | |
| 2431 "altglyphitem": "altGlyphItem", | |
| 2432 "animatecolor": "animateColor", | |
| 2433 "animatemotion": "animateMotion", | |
| 2434 "animatetransform": "animateTransform", | |
| 2435 "clippath": "clipPath", | |
| 2436 "feblend": "feBlend", | |
| 2437 "fecolormatrix": "feColorMatrix", | |
| 2438 "fecomponenttransfer": "feComponentTransfer", | |
| 2439 "fecomposite": "feComposite", | |
| 2440 "feconvolvematrix": "feConvolveMatrix", | |
| 2441 "fediffuselighting": "feDiffuseLighting", | |
| 2442 "fedisplacementmap": "feDisplacementMap", | |
| 2443 "fedistantlight": "feDistantLight", | |
| 2444 "feflood": "feFlood", | |
| 2445 "fefunca": "feFuncA", | |
| 2446 "fefuncb": "feFuncB", | |
| 2447 "fefuncg": "feFuncG", | |
| 2448 "fefuncr": "feFuncR", | |
| 2449 "fegaussianblur": "feGaussianBlur", | |
| 2450 "feimage": "feImage", | |
| 2451 "femerge": "feMerge", | |
| 2452 "femergenode": "feMergeNode", | |
| 2453 "femorphology": "feMorphology", | |
| 2454 "feoffset": "feOffset", | |
| 2455 "fepointlight": "fePointLight", | |
| 2456 "fespecularlighting": "feSpecularLighting", | |
| 2457 "fespotlight": "feSpotLight", | |
| 2458 "fetile": "feTile", | |
| 2459 "feturbulence": "feTurbulence", | |
| 2460 "foreignobject": "foreignObject", | |
| 2461 "glyphref": "glyphRef", | |
| 2462 "lineargradient": "linearGradient", | |
| 2463 "radialgradient": "radialGradient", | |
| 2464 "textpath": "textPath"} | |
| 2465 | |
| 2466 if token["name"] in replacements: | |
| 2467 token["name"] = replacements[token["name"]] | |
| 2468 | |
| 2469 def processCharacters(self, token): | |
| 2470 if token["data"] == "\u0000": | |
| 2471 token["data"] = "\uFFFD" | |
| 2472 elif (self.parser.framesetOK and | |
| 2473 any(char not in spaceCharacters for char in token["data"])): | |
| 2474 self.parser.framesetOK = False | |
| 2475 Phase.processCharacters(self, token) | |
| 2476 | |
| 2477 def processStartTag(self, token): | |
| 2478 currentNode = self.tree.openElements[-1] | |
| 2479 if (token["name"] in self.breakoutElements or | |
| 2480 (token["name"] == "font" and | |
| 2481 set(token["data"].keys()) & set(["color", "face", "size"]))): | |
| 2482 self.parser.parseError("unexpected-html-element-in-foreign-content", | |
| 2483 {"name": token["name"]}) | |
| 2484 while (self.tree.openElements[-1].namespace != | |
| 2485 self.tree.defaultNamespace and | |
| 2486 not self.parser.isHTMLIntegrationPoint(self.tree.openElements[-1]) and | |
| 2487 not self.parser.isMathMLTextIntegrationPoint(self.tree.openElements[-1])): | |
| 2488 self.tree.openElements.pop() | |
| 2489 return token | |
| 2490 | |
| 2491 else: | |
| 2492 if currentNode.namespace == namespaces["mathml"]: | |
| 2493 self.parser.adjustMathMLAttributes(token) | |
| 2494 elif currentNode.namespace == namespaces["svg"]: | |
| 2495 self.adjustSVGTagNames(token) | |
| 2496 self.parser.adjustSVGAttributes(token) | |
| 2497 self.parser.adjustForeignAttributes(token) | |
| 2498 token["namespace"] = currentNode.namespace | |
| 2499 self.tree.insertElement(token) | |
| 2500 if token["selfClosing"]: | |
| 2501 self.tree.openElements.pop() | |
| 2502 token["selfClosingAcknowledged"] = True | |
| 2503 | |
| 2504 def processEndTag(self, token): | |
| 2505 nodeIndex = len(self.tree.openElements) - 1 | |
| 2506 node = self.tree.openElements[-1] | |
| 2507 if node.name.translate(asciiUpper2Lower) != token["name"]: | |
| 2508 self.parser.parseError("unexpected-end-tag", {"name": token["name"]}) | |
| 2509 | |
| 2510 while True: | |
| 2511 if node.name.translate(asciiUpper2Lower) == token["name"]: | |
| 2512 # XXX this isn't in the spec but it seems necessary | |
| 2513 if self.parser.phase == self.parser.phases["inTableText"]: | |
| 2514 self.parser.phase.flushCharacters() | |
| 2515 self.parser.phase = self.parser.phase.originalPhase | |
| 2516 while self.tree.openElements.pop() != node: | |
| 2517 assert self.tree.openElements | |
| 2518 new_token = None | |
| 2519 break | |
| 2520 nodeIndex -= 1 | |
| 2521 | |
| 2522 node = self.tree.openElements[nodeIndex] | |
| 2523 if node.namespace != self.tree.defaultNamespace: | |
| 2524 continue | |
| 2525 else: | |
| 2526 new_token = self.parser.phase.processEndTag(token) | |
| 2527 break | |
| 2528 return new_token | |
| 2529 | |
| 2530 class AfterBodyPhase(Phase): | |
| 2531 def __init__(self, parser, tree): | |
| 2532 Phase.__init__(self, parser, tree) | |
| 2533 | |
| 2534 self.startTagHandler = _utils.MethodDispatcher([ | |
| 2535 ("html", self.startTagHtml) | |
| 2536 ]) | |
| 2537 self.startTagHandler.default = self.startTagOther | |
| 2538 | |
| 2539 self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)]) | |
| 2540 self.endTagHandler.default = self.endTagOther | |
| 2541 | |
| 2542 def processEOF(self): | |
| 2543 # Stop parsing | |
| 2544 pass | |
| 2545 | |
| 2546 def processComment(self, token): | |
| 2547 # This is needed because data is to be appended to the <html> element | |
| 2548 # here and not to whatever is currently open. | |
| 2549 self.tree.insertComment(token, self.tree.openElements[0]) | |
| 2550 | |
| 2551 def processCharacters(self, token): | |
| 2552 self.parser.parseError("unexpected-char-after-body") | |
| 2553 self.parser.phase = self.parser.phases["inBody"] | |
| 2554 return token | |
| 2555 | |
| 2556 def startTagHtml(self, token): | |
| 2557 return self.parser.phases["inBody"].processStartTag(token) | |
| 2558 | |
| 2559 def startTagOther(self, token): | |
| 2560 self.parser.parseError("unexpected-start-tag-after-body", | |
| 2561 {"name": token["name"]}) | |
| 2562 self.parser.phase = self.parser.phases["inBody"] | |
| 2563 return token | |
| 2564 | |
| 2565 def endTagHtml(self, name): | |
| 2566 if self.parser.innerHTML: | |
| 2567 self.parser.parseError("unexpected-end-tag-after-body-innerhtml") | |
| 2568 else: | |
| 2569 self.parser.phase = self.parser.phases["afterAfterBody"] | |
| 2570 | |
| 2571 def endTagOther(self, token): | |
| 2572 self.parser.parseError("unexpected-end-tag-after-body", | |
| 2573 {"name": token["name"]}) | |
| 2574 self.parser.phase = self.parser.phases["inBody"] | |
| 2575 return token | |
| 2576 | |
| 2577 class InFramesetPhase(Phase): | |
| 2578 # http://www.whatwg.org/specs/web-apps/current-work/#in-frameset | |
| 2579 def __init__(self, parser, tree): | |
| 2580 Phase.__init__(self, parser, tree) | |
| 2581 | |
| 2582 self.startTagHandler = _utils.MethodDispatcher([ | |
| 2583 ("html", self.startTagHtml), | |
| 2584 ("frameset", self.startTagFrameset), | |
| 2585 ("frame", self.startTagFrame), | |
| 2586 ("noframes", self.startTagNoframes) | |
| 2587 ]) | |
| 2588 self.startTagHandler.default = self.startTagOther | |
| 2589 | |
| 2590 self.endTagHandler = _utils.MethodDispatcher([ | |
| 2591 ("frameset", self.endTagFrameset) | |
| 2592 ]) | |
| 2593 self.endTagHandler.default = self.endTagOther | |
| 2594 | |
| 2595 def processEOF(self): | |
| 2596 if self.tree.openElements[-1].name != "html": | |
| 2597 self.parser.parseError("eof-in-frameset") | |
| 2598 else: | |
| 2599 assert self.parser.innerHTML | |
| 2600 | |
| 2601 def processCharacters(self, token): | |
| 2602 self.parser.parseError("unexpected-char-in-frameset") | |
| 2603 | |
| 2604 def startTagFrameset(self, token): | |
| 2605 self.tree.insertElement(token) | |
| 2606 | |
| 2607 def startTagFrame(self, token): | |
| 2608 self.tree.insertElement(token) | |
| 2609 self.tree.openElements.pop() | |
| 2610 | |
| 2611 def startTagNoframes(self, token): | |
| 2612 return self.parser.phases["inBody"].processStartTag(token) | |
| 2613 | |
| 2614 def startTagOther(self, token): | |
| 2615 self.parser.parseError("unexpected-start-tag-in-frameset", | |
| 2616 {"name": token["name"]}) | |
| 2617 | |
| 2618 def endTagFrameset(self, token): | |
| 2619 if self.tree.openElements[-1].name == "html": | |
| 2620 # innerHTML case | |
| 2621 self.parser.parseError("unexpected-frameset-in-frameset-innerhtml") | |
| 2622 else: | |
| 2623 self.tree.openElements.pop() | |
| 2624 if (not self.parser.innerHTML and | |
| 2625 self.tree.openElements[-1].name != "frameset"): | |
| 2626 # If we're not in innerHTML mode and the current node is not a | |
| 2627 # "frameset" element (anymore) then switch. | |
| 2628 self.parser.phase = self.parser.phases["afterFrameset"] | |
| 2629 | |
| 2630 def endTagOther(self, token): | |
| 2631 self.parser.parseError("unexpected-end-tag-in-frameset", | |
| 2632 {"name": token["name"]}) | |
| 2633 | |
| 2634 class AfterFramesetPhase(Phase): | |
| 2635 # http://www.whatwg.org/specs/web-apps/current-work/#after3 | |
| 2636 def __init__(self, parser, tree): | |
| 2637 Phase.__init__(self, parser, tree) | |
| 2638 | |
| 2639 self.startTagHandler = _utils.MethodDispatcher([ | |
| 2640 ("html", self.startTagHtml), | |
| 2641 ("noframes", self.startTagNoframes) | |
| 2642 ]) | |
| 2643 self.startTagHandler.default = self.startTagOther | |
| 2644 | |
| 2645 self.endTagHandler = _utils.MethodDispatcher([ | |
| 2646 ("html", self.endTagHtml) | |
| 2647 ]) | |
| 2648 self.endTagHandler.default = self.endTagOther | |
| 2649 | |
| 2650 def processEOF(self): | |
| 2651 # Stop parsing | |
| 2652 pass | |
| 2653 | |
| 2654 def processCharacters(self, token): | |
| 2655 self.parser.parseError("unexpected-char-after-frameset") | |
| 2656 | |
| 2657 def startTagNoframes(self, token): | |
| 2658 return self.parser.phases["inHead"].processStartTag(token) | |
| 2659 | |
| 2660 def startTagOther(self, token): | |
| 2661 self.parser.parseError("unexpected-start-tag-after-frameset", | |
| 2662 {"name": token["name"]}) | |
| 2663 | |
| 2664 def endTagHtml(self, token): | |
| 2665 self.parser.phase = self.parser.phases["afterAfterFrameset"] | |
| 2666 | |
| 2667 def endTagOther(self, token): | |
| 2668 self.parser.parseError("unexpected-end-tag-after-frameset", | |
| 2669 {"name": token["name"]}) | |
| 2670 | |
| 2671 class AfterAfterBodyPhase(Phase): | |
| 2672 def __init__(self, parser, tree): | |
| 2673 Phase.__init__(self, parser, tree) | |
| 2674 | |
| 2675 self.startTagHandler = _utils.MethodDispatcher([ | |
| 2676 ("html", self.startTagHtml) | |
| 2677 ]) | |
| 2678 self.startTagHandler.default = self.startTagOther | |
| 2679 | |
| 2680 def processEOF(self): | |
| 2681 pass | |
| 2682 | |
| 2683 def processComment(self, token): | |
| 2684 self.tree.insertComment(token, self.tree.document) | |
| 2685 | |
| 2686 def processSpaceCharacters(self, token): | |
| 2687 return self.parser.phases["inBody"].processSpaceCharacters(token) | |
| 2688 | |
| 2689 def processCharacters(self, token): | |
| 2690 self.parser.parseError("expected-eof-but-got-char") | |
| 2691 self.parser.phase = self.parser.phases["inBody"] | |
| 2692 return token | |
| 2693 | |
| 2694 def startTagHtml(self, token): | |
| 2695 return self.parser.phases["inBody"].processStartTag(token) | |
| 2696 | |
| 2697 def startTagOther(self, token): | |
| 2698 self.parser.parseError("expected-eof-but-got-start-tag", | |
| 2699 {"name": token["name"]}) | |
| 2700 self.parser.phase = self.parser.phases["inBody"] | |
| 2701 return token | |
| 2702 | |
| 2703 def processEndTag(self, token): | |
| 2704 self.parser.parseError("expected-eof-but-got-end-tag", | |
| 2705 {"name": token["name"]}) | |
| 2706 self.parser.phase = self.parser.phases["inBody"] | |
| 2707 return token | |
| 2708 | |
| 2709 class AfterAfterFramesetPhase(Phase): | |
| 2710 def __init__(self, parser, tree): | |
| 2711 Phase.__init__(self, parser, tree) | |
| 2712 | |
| 2713 self.startTagHandler = _utils.MethodDispatcher([ | |
| 2714 ("html", self.startTagHtml), | |
| 2715 ("noframes", self.startTagNoFrames) | |
| 2716 ]) | |
| 2717 self.startTagHandler.default = self.startTagOther | |
| 2718 | |
| 2719 def processEOF(self): | |
| 2720 pass | |
| 2721 | |
| 2722 def processComment(self, token): | |
| 2723 self.tree.insertComment(token, self.tree.document) | |
| 2724 | |
| 2725 def processSpaceCharacters(self, token): | |
| 2726 return self.parser.phases["inBody"].processSpaceCharacters(token) | |
| 2727 | |
| 2728 def processCharacters(self, token): | |
| 2729 self.parser.parseError("expected-eof-but-got-char") | |
| 2730 | |
| 2731 def startTagHtml(self, token): | |
| 2732 return self.parser.phases["inBody"].processStartTag(token) | |
| 2733 | |
| 2734 def startTagNoFrames(self, token): | |
| 2735 return self.parser.phases["inHead"].processStartTag(token) | |
| 2736 | |
| 2737 def startTagOther(self, token): | |
| 2738 self.parser.parseError("expected-eof-but-got-start-tag", | |
| 2739 {"name": token["name"]}) | |
| 2740 | |
| 2741 def processEndTag(self, token): | |
| 2742 self.parser.parseError("expected-eof-but-got-end-tag", | |
| 2743 {"name": token["name"]}) | |
| 2744 # pylint:enable=unused-argument | |
| 2745 | |
| 2746 return { | |
| 2747 "initial": InitialPhase, | |
| 2748 "beforeHtml": BeforeHtmlPhase, | |
| 2749 "beforeHead": BeforeHeadPhase, | |
| 2750 "inHead": InHeadPhase, | |
| 2751 "inHeadNoscript": InHeadNoscriptPhase, | |
| 2752 "afterHead": AfterHeadPhase, | |
| 2753 "inBody": InBodyPhase, | |
| 2754 "text": TextPhase, | |
| 2755 "inTable": InTablePhase, | |
| 2756 "inTableText": InTableTextPhase, | |
| 2757 "inCaption": InCaptionPhase, | |
| 2758 "inColumnGroup": InColumnGroupPhase, | |
| 2759 "inTableBody": InTableBodyPhase, | |
| 2760 "inRow": InRowPhase, | |
| 2761 "inCell": InCellPhase, | |
| 2762 "inSelect": InSelectPhase, | |
| 2763 "inSelectInTable": InSelectInTablePhase, | |
| 2764 "inForeignContent": InForeignContentPhase, | |
| 2765 "afterBody": AfterBodyPhase, | |
| 2766 "inFrameset": InFramesetPhase, | |
| 2767 "afterFrameset": AfterFramesetPhase, | |
| 2768 "afterAfterBody": AfterAfterBodyPhase, | |
| 2769 "afterAfterFrameset": AfterAfterFramesetPhase, | |
| 2770 # XXX after after frameset | |
| 2771 } | |
| 2772 | |
| 2773 | |
| 2774 def adjust_attributes(token, replacements): | |
| 2775 needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) | |
| 2776 if needs_adjustment: | |
| 2777 token['data'] = OrderedDict((replacements.get(k, k), v) | |
| 2778 for k, v in token['data'].items()) | |
| 2779 | |
| 2780 | |
| 2781 def impliedTagToken(name, type="EndTag", attributes=None, | |
| 2782 selfClosing=False): | |
| 2783 if attributes is None: | |
| 2784 attributes = {} | |
| 2785 return {"type": tokenTypes[type], "name": name, "data": attributes, | |
| 2786 "selfClosing": selfClosing} | |
| 2787 | |
| 2788 | |
| 2789 class ParseError(Exception): | |
| 2790 """Error in parsed document""" | |
| 2791 pass |
